{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6365829417675869, "calibration/batch_distribution_entropy": 0.6585998493860218, "calibration/confidence_entropy": 0.34533354478171396, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.49700134759958975, "calibration/mean_confidence": 0.7904389820626345, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1493.8, "completions/mean_length": 272.11005859375, "completions/mean_terminated_length": 222.85674438476562, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.09750684350728989, "learning_rate": 3.1249999999999997e-07, "loss": 0.0951, "num_tokens": 17630439.0, "reward": 0.6584686756134033, "reward_std": 0.4942072808742523, "rewards/accuracy_reward": 0.26572265625, "rewards/brier_reward": 0.40714969038963317, "rewards/confidence_uniqueness_reward": 0.4840377986431122, "rewards/format_reward": 0.6783203125, "rewards/frontier_aurc_reward": 0.2998352885246277, "rewards/frontier_coverage_1": 0.2998352885246277, "rewards/frontier_coverage_10": 0.2998352885246277, "rewards/frontier_coverage_15": 0.2998352885246277, "rewards/frontier_coverage_20": 0.2998352885246277, "rewards/frontier_coverage_25": 0.2998352885246277, "rewards/frontier_coverage_5": 0.2998352885246277, "rewards/frontier_ece_reward": 0.2998352885246277, "signal/accuracy_reward/centered_abs_mean": 0.275433349609375, "signal/accuracy_reward/group_std_mean": 0.31715606451034545, "signal/accuracy_reward/group_zero_std_frac": 0.25625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1377166748046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1377166748046875, "signal/advantage_abs_mean": 0.42427608370780945, "signal/advantage_pre_scale_abs_mean": 0.42427608370780945, "signal/advantage_pre_scale_std": 0.5014617919921875, "signal/advantage_std": 0.5014617919921875, "signal/brier_reward/centered_abs_mean": 0.3354613959789276, "signal/brier_reward/group_std_mean": 0.3796173930168152, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04193267449736595, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04193267449736595, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2990226149559021, "signal/confidence_uniqueness_reward/group_std_mean": 0.3495690166950226, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03737782686948776, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03737782686948776, "signal/format_reward/centered_abs_mean": 0.4059326171875, "signal/format_reward/group_std_mean": 0.45510302782058715, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.20296630859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.20296630859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.292724135518074, "signal/frontier_aurc_reward/group_std_mean": 0.3436519503593445, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_1/centered_abs_mean": 0.292724135518074, "signal/frontier_coverage_1/group_std_mean": 0.3436519503593445, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_10/centered_abs_mean": 0.292724135518074, "signal/frontier_coverage_10/group_std_mean": 0.3436519503593445, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_15/centered_abs_mean": 0.292724135518074, "signal/frontier_coverage_15/group_std_mean": 0.3436519503593445, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_20/centered_abs_mean": 0.292724135518074, "signal/frontier_coverage_20/group_std_mean": 0.3436519503593445, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_25/centered_abs_mean": 0.292724135518074, "signal/frontier_coverage_25/group_std_mean": 0.3436519503593445, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_5/centered_abs_mean": 0.292724135518074, "signal/frontier_coverage_5/group_std_mean": 0.3436519503593445, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005239761807024479, "signal/frontier_ece_reward/centered_abs_mean": 0.292724135518074, "signal/frontier_ece_reward/group_std_mean": 0.3436519503593445, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03659051693975925, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03659051693975925, "step": 5 }, { "calibration/aurc": 0.6490185669552773, "calibration/batch_distribution_entropy": 0.6521431833905986, "calibration/confidence_entropy": 0.34190821981819597, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5192164657110803, "calibration/mean_confidence": 0.793153970465858, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0359375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1499.6, "completions/mean_length": 262.6380859375, "completions/mean_terminated_length": 215.18854370117188, "completions/min_length": 1.8, "completions/min_terminated_length": 1.8, "epoch": 0.032, "grad_norm": 0.043926581740379333, "learning_rate": 6.249999999999999e-07, "loss": 0.0953, "num_tokens": 35420205.0, "reward": 0.6759413719177246, "reward_std": 0.47106270790100097, "rewards/accuracy_reward": 0.25302734375, "rewards/brier_reward": 0.4121467649936676, "rewards/confidence_uniqueness_reward": 0.5148928165435791, "rewards/format_reward": 0.7166015625, "rewards/frontier_aurc_reward": 0.3006272315979004, "rewards/frontier_coverage_1": 0.3006272315979004, "rewards/frontier_coverage_10": 0.3006272315979004, "rewards/frontier_coverage_15": 0.3006272315979004, "rewards/frontier_coverage_20": 0.3006272315979004, "rewards/frontier_coverage_25": 0.3006272315979004, "rewards/frontier_coverage_5": 0.3006272315979004, "rewards/frontier_ece_reward": 0.3006272315979004, "signal/accuracy_reward/centered_abs_mean": 0.262005615234375, "signal/accuracy_reward/group_std_mean": 0.30882692337036133, "signal/accuracy_reward/group_zero_std_frac": 0.253125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1310028076171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1310028076171875, "signal/advantage_abs_mean": 0.3928821861743927, "signal/advantage_pre_scale_abs_mean": 0.3928821861743927, "signal/advantage_pre_scale_std": 0.4780964195728302, "signal/advantage_std": 0.4780964195728302, "signal/brier_reward/centered_abs_mean": 0.32577033042907716, "signal/brier_reward/group_std_mean": 0.3727036893367767, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.040721291303634645, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.040721291303634645, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.28297078013420107, "signal/confidence_uniqueness_reward/group_std_mean": 0.34029907584190366, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035371347516775134, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.035371347516775134, "signal/format_reward/centered_abs_mean": 0.3770751953125, "signal/format_reward/group_std_mean": 0.43777642846107484, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18853759765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.18853759765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.2814865827560425, "signal/frontier_aurc_reward/group_std_mean": 0.33588545918464663, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_1/centered_abs_mean": 0.2814865827560425, "signal/frontier_coverage_1/group_std_mean": 0.33588545918464663, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_10/centered_abs_mean": 0.2814865827560425, "signal/frontier_coverage_10/group_std_mean": 0.33588545918464663, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_15/centered_abs_mean": 0.2814865827560425, "signal/frontier_coverage_15/group_std_mean": 0.33588545918464663, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_20/centered_abs_mean": 0.2814865827560425, "signal/frontier_coverage_20/group_std_mean": 0.33588545918464663, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_25/centered_abs_mean": 0.2814865827560425, "signal/frontier_coverage_25/group_std_mean": 0.33588545918464663, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_5/centered_abs_mean": 0.2814865827560425, "signal/frontier_coverage_5/group_std_mean": 0.33588545918464663, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005038609728217125, "signal/frontier_ece_reward/centered_abs_mean": 0.2814865827560425, "signal/frontier_ece_reward/group_std_mean": 0.33588545918464663, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03518582284450531, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03518582284450531, "step": 10 }, { "calibration/aurc": 0.6122665169899899, "calibration/batch_distribution_entropy": 0.6434935543708379, "calibration/buffer_distribution_entropy": 0.665517862384225, "calibration/confidence_entropy": 0.3373251428514815, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.48297516178973776, "calibration/mean_confidence": 0.802906526845207, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0193359375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1476.6, "completions/mean_length": 208.11865234375, "completions/mean_terminated_length": 182.09054870605468, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.048, "grad_norm": 0.0721765011548996, "learning_rate": 9.374999999999999e-07, "loss": 0.0643, "num_tokens": 52600076.0, "reward": 0.8152373671531677, "reward_std": 0.36448066532611845, "rewards/accuracy_reward": 0.3287109375, "rewards/brier_reward": 0.521061384677887, "rewards/confidence_uniqueness_reward": 0.627258050441742, "rewards/format_reward": 0.8685546875, "rewards/frontier_aurc_reward": 0.29203636273741723, "rewards/frontier_coverage_1": 0.30767875611782075, "rewards/frontier_coverage_10": 0.30767875611782075, "rewards/frontier_coverage_15": 0.30767875611782075, "rewards/frontier_coverage_20": 0.30767875611782075, "rewards/frontier_coverage_25": 0.30767875611782075, "rewards/frontier_coverage_5": 0.30767875611782075, "rewards/frontier_ece_reward": 0.27833986282348633, "signal/accuracy_reward/centered_abs_mean": 0.2286376953125, "signal/accuracy_reward/group_std_mean": 0.2820782124996185, "signal/accuracy_reward/group_zero_std_frac": 0.275, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11431884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11431884765625, "signal/advantage_abs_mean": 0.27974323034286497, "signal/advantage_pre_scale_abs_mean": 0.27974323034286497, "signal/advantage_pre_scale_std": 0.3752832055091858, "signal/advantage_std": 0.3752832055091858, "signal/brier_reward/centered_abs_mean": 0.28167834877967834, "signal/brier_reward/group_std_mean": 0.3381074070930481, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03520979359745979, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03520979359745979, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20614430904388428, "signal/confidence_uniqueness_reward/group_std_mean": 0.2669346034526825, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025768038630485535, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025768038630485535, "signal/format_reward/centered_abs_mean": 0.20537109375, "signal/format_reward/group_std_mean": 0.29972409307956693, "signal/format_reward/group_zero_std_frac": 0.078125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.102685546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.102685546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.22096354123204948, "signal/frontier_aurc_reward/group_std_mean": 0.2634817738085985, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0039552472357172515, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0039552472357172515, "signal/frontier_coverage_1/centered_abs_mean": 0.24031212329864501, "signal/frontier_coverage_1/group_std_mean": 0.2931499183177948, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_10/centered_abs_mean": 0.24031212329864501, "signal/frontier_coverage_10/group_std_mean": 0.2931499183177948, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_15/centered_abs_mean": 0.24031212329864501, "signal/frontier_coverage_15/group_std_mean": 0.2931499183177948, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_20/centered_abs_mean": 0.24031212329864501, "signal/frontier_coverage_20/group_std_mean": 0.2931499183177948, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_25/centered_abs_mean": 0.24031212329864501, "signal/frontier_coverage_25/group_std_mean": 0.2931499183177948, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_5/centered_abs_mean": 0.24031212329864501, "signal/frontier_coverage_5/group_std_mean": 0.2931499183177948, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004301586840301752, "signal/frontier_ece_reward/centered_abs_mean": 0.24247534871101378, "signal/frontier_ece_reward/group_std_mean": 0.2915202736854553, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030309418588876723, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030309418588876723, "step": 15 }, { "calibration/aurc": 0.5364586742043255, "calibration/batch_distribution_entropy": 0.7039374947589497, "calibration/buffer_distribution_entropy": 0.6644280848988587, "calibration/confidence_entropy": 0.3737305065676649, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.37717855912862763, "calibration/mean_confidence": 0.7704402478934718, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00419921875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1247.8, "completions/mean_length": 141.49462890625, "completions/mean_terminated_length": 135.61878967285156, "completions/min_length": 14.4, "completions/min_terminated_length": 14.4, "epoch": 0.064, "grad_norm": 0.02302715927362442, "learning_rate": 1e-06, "loss": 0.0148, "num_tokens": 68967381.0, "reward": 0.8545892715454102, "reward_std": 0.22467853426933287, "rewards/accuracy_reward": 0.3982421875, "rewards/brier_reward": 0.6125121355056763, "rewards/confidence_uniqueness_reward": 0.7326454758644104, "rewards/format_reward": 0.97578125, "rewards/frontier_aurc_reward": -0.007029248867183924, "rewards/frontier_coverage_1": 0.06456700265407563, "rewards/frontier_coverage_10": 0.06456700265407563, "rewards/frontier_coverage_15": 0.06456700265407563, "rewards/frontier_coverage_20": 0.06456700265407563, "rewards/frontier_coverage_25": 0.06456700265407563, "rewards/frontier_coverage_5": 0.06456700265407563, "rewards/frontier_ece_reward": -0.05900650816038251, "signal/accuracy_reward/centered_abs_mean": 0.22564697265625, "signal/accuracy_reward/group_std_mean": 0.27533276081085206, "signal/accuracy_reward/group_zero_std_frac": 0.30625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.112823486328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.112823486328125, "signal/advantage_abs_mean": 0.17218832969665526, "signal/advantage_pre_scale_abs_mean": 0.17218832969665526, "signal/advantage_pre_scale_std": 0.24152583181858062, "signal/advantage_std": 0.24152583181858062, "signal/brier_reward/centered_abs_mean": 0.23915229737758636, "signal/brier_reward/group_std_mean": 0.2956496119499207, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029894037172198296, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.029894037172198296, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12884613871574402, "signal/confidence_uniqueness_reward/group_std_mean": 0.1678939491510391, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.016105767339468002, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.016105767339468002, "signal/format_reward/centered_abs_mean": 0.044775390625, "signal/format_reward/group_std_mean": 0.10225975811481476, "signal/format_reward/group_zero_std_frac": 0.51875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0223876953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0223876953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.005020592454820871, "signal/frontier_aurc_reward/group_std_mean": 0.00707492595538497, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.986860339064152e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.986860339064152e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10616557449102401, "signal/frontier_coverage_1/group_std_mean": 0.1660414755344391, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_10/centered_abs_mean": 0.10616557449102401, "signal/frontier_coverage_10/group_std_mean": 0.1660414755344391, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_15/centered_abs_mean": 0.10616557449102401, "signal/frontier_coverage_15/group_std_mean": 0.1660414755344391, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_20/centered_abs_mean": 0.10616557449102401, "signal/frontier_coverage_20/group_std_mean": 0.1660414755344391, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_25/centered_abs_mean": 0.10616557449102401, "signal/frontier_coverage_25/group_std_mean": 0.1660414755344391, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_5/centered_abs_mean": 0.10616557449102401, "signal/frontier_coverage_5/group_std_mean": 0.1660414755344391, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001900363783352077, "signal/frontier_ece_reward/centered_abs_mean": 0.13162615597248079, "signal/frontier_ece_reward/group_std_mean": 0.16141263544559478, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.016453269496560098, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.016453269496560098, "step": 20 }, { "calibration/aurc": 0.6185683801548196, "calibration/batch_distribution_entropy": 0.7741915745085599, "calibration/buffer_distribution_entropy": 0.6910919502278804, "calibration/confidence_entropy": 0.44644902052135416, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4119077840049532, "calibration/mean_confidence": 0.728700664962728, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 1536.0, "completions/max_terminated_length": 804.6, "completions/mean_length": 121.98232421875, "completions/mean_terminated_length": 120.46162719726563, "completions/min_length": 29.6, "completions/min_terminated_length": 29.6, "epoch": 0.08, "grad_norm": 0.03972521796822548, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 85149632.0, "reward": 0.8964222550392151, "reward_std": 0.1864424616098404, "rewards/accuracy_reward": 0.43291015625, "rewards/brier_reward": 0.6608709096908569, "rewards/confidence_uniqueness_reward": 0.7922413229942322, "rewards/format_reward": 0.99423828125, "rewards/frontier_aurc_reward": -0.0059954837895929815, "rewards/frontier_coverage_1": 0.06537417620420456, "rewards/frontier_coverage_10": 0.06537417620420456, "rewards/frontier_coverage_15": 0.06537417620420456, "rewards/frontier_coverage_20": 0.06537417620420456, "rewards/frontier_coverage_25": 0.06537417620420456, "rewards/frontier_coverage_5": 0.06537417620420456, "rewards/frontier_ece_reward": -0.04563892595469952, "signal/accuracy_reward/centered_abs_mean": 0.212396240234375, "signal/accuracy_reward/group_std_mean": 0.2677969515323639, "signal/accuracy_reward/group_zero_std_frac": 0.28125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1061981201171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1061981201171875, "signal/advantage_abs_mean": 0.1454618066549301, "signal/advantage_pre_scale_abs_mean": 0.1454618066549301, "signal/advantage_pre_scale_std": 0.20510812401771544, "signal/advantage_std": 0.20510812401771544, "signal/brier_reward/centered_abs_mean": 0.2104009658098221, "signal/brier_reward/group_std_mean": 0.2618310570716858, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02630012072622776, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02630012072622776, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08538121879100799, "signal/confidence_uniqueness_reward/group_std_mean": 0.11254773437976837, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010672652348875999, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010672652348875999, "signal/format_reward/centered_abs_mean": 0.011029052734375, "signal/format_reward/group_std_mean": 0.029448001086711882, "signal/format_reward/group_zero_std_frac": 0.84375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0055145263671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0055145263671875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036648384761065246, "signal/frontier_aurc_reward/group_std_mean": 0.005378965474665165, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.560060792253353e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.560060792253353e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12216156721115112, "signal/frontier_coverage_1/group_std_mean": 0.1854743927717209, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_10/centered_abs_mean": 0.12216156721115112, "signal/frontier_coverage_10/group_std_mean": 0.1854743927717209, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_15/centered_abs_mean": 0.12216156721115112, "signal/frontier_coverage_15/group_std_mean": 0.1854743927717209, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_20/centered_abs_mean": 0.12216156721115112, "signal/frontier_coverage_20/group_std_mean": 0.1854743927717209, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_25/centered_abs_mean": 0.12216156721115112, "signal/frontier_coverage_25/group_std_mean": 0.1854743927717209, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_5/centered_abs_mean": 0.12216156721115112, "signal/frontier_coverage_5/group_std_mean": 0.1854743927717209, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021866919472813607, "signal/frontier_ece_reward/centered_abs_mean": 0.11495534181594849, "signal/frontier_ece_reward/group_std_mean": 0.14088748395442963, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014369417726993561, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014369417726993561, "step": 25 }, { "calibration/aurc": 0.6111911956970324, "calibration/batch_distribution_entropy": 0.8338461630160652, "calibration/buffer_distribution_entropy": 0.724930626699906, "calibration/confidence_entropy": 0.5217741949310019, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3064762667859151, "calibration/mean_confidence": 0.6442235468220104, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1422.8, "completions/max_terminated_length": 868.2, "completions/mean_length": 122.53857421875, "completions/mean_terminated_length": 121.71004791259766, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.096, "grad_norm": 0.004610727075487375, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 101449035.0, "reward": 0.9245005130767823, "reward_std": 0.16271952390670777, "rewards/accuracy_reward": 0.46552734375, "rewards/brier_reward": 0.7045912981033325, "rewards/confidence_uniqueness_reward": 0.8080425143241883, "rewards/format_reward": 0.99658203125, "rewards/frontier_aurc_reward": -0.005250969249755144, "rewards/frontier_coverage_1": 0.07289079874753952, "rewards/frontier_coverage_10": 0.07289079874753952, "rewards/frontier_coverage_15": 0.07289079874753952, "rewards/frontier_coverage_20": 0.07289079874753952, "rewards/frontier_coverage_25": 0.07289079874753952, "rewards/frontier_coverage_5": 0.07289079874753952, "rewards/frontier_ece_reward": -0.026943267788738012, "signal/accuracy_reward/centered_abs_mean": 0.185174560546875, "signal/accuracy_reward/group_std_mean": 0.23991808891296387, "signal/accuracy_reward/group_zero_std_frac": 0.334375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0925872802734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0925872802734375, "signal/advantage_abs_mean": 0.12463378012180329, "signal/advantage_pre_scale_abs_mean": 0.12463378012180329, "signal/advantage_pre_scale_std": 0.1815657287836075, "signal/advantage_std": 0.1815657287836075, "signal/brier_reward/centered_abs_mean": 0.18598549365997313, "signal/brier_reward/group_std_mean": 0.2356630265712738, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02324818670749664, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02324818670749664, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08923951834440232, "signal/confidence_uniqueness_reward/group_std_mean": 0.11270735561847686, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01115493979305029, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01115493979305029, "signal/format_reward/centered_abs_mean": 0.006573486328125, "signal/format_reward/group_std_mean": 0.017989716865122317, "signal/format_reward/group_zero_std_frac": 0.903125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0032867431640625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0032867431640625, "signal/frontier_aurc_reward/centered_abs_mean": 0.00277226809412241, "signal/frontier_aurc_reward/group_std_mean": 0.004334397334605456, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.962359598721377e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.962359598721377e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14637718200683594, "signal/frontier_coverage_1/group_std_mean": 0.20774976015090943, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_10/centered_abs_mean": 0.14637718200683594, "signal/frontier_coverage_10/group_std_mean": 0.20774976015090943, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_15/centered_abs_mean": 0.14637718200683594, "signal/frontier_coverage_15/group_std_mean": 0.20774976015090943, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_20/centered_abs_mean": 0.14637718200683594, "signal/frontier_coverage_20/group_std_mean": 0.20774976015090943, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_25/centered_abs_mean": 0.14637718200683594, "signal/frontier_coverage_25/group_std_mean": 0.20774976015090943, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_5/centered_abs_mean": 0.14637718200683594, "signal/frontier_coverage_5/group_std_mean": 0.20774976015090943, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026201514527201654, "signal/frontier_ece_reward/centered_abs_mean": 0.09951903373003006, "signal/frontier_ece_reward/group_std_mean": 0.12258590757846832, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012439879216253757, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012439879216253757, "step": 30 }, { "calibration/aurc": 0.47581321016978, "calibration/batch_distribution_entropy": 0.8613389118246927, "calibration/buffer_distribution_entropy": 0.7676086231549225, "calibration/confidence_entropy": 0.5509334290904493, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.00821917808219178, "calibration/coverage@25%": 0.01643835616438356, "calibration/coverage@30%": 0.028180039138943246, "calibration/coverage@5%": 0.0, "calibration/ece": 0.17883227267157914, "calibration/mean_confidence": 0.5884126079190704, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 690.6, "completions/max_terminated_length": 476.2, "completions/mean_length": 130.32890625, "completions/mean_terminated_length": 130.19188842773437, "completions/min_length": 43.2, "completions/min_terminated_length": 43.2, "epoch": 0.112, "grad_norm": 0.008051756769418716, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 117893075.0, "reward": 0.9516644597053527, "reward_std": 0.1442788153886795, "rewards/accuracy_reward": 0.49208984375, "rewards/brier_reward": 0.7432172417640686, "rewards/confidence_uniqueness_reward": 0.8300640344619751, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.004443310108035803, "rewards/frontier_coverage_1": 0.09440049231052398, "rewards/frontier_coverage_10": 0.09440049231052398, "rewards/frontier_coverage_15": 0.09440049231052398, "rewards/frontier_coverage_20": 0.09440049231052398, "rewards/frontier_coverage_25": 0.09440049231052398, "rewards/frontier_coverage_5": 0.09440049231052398, "rewards/frontier_ece_reward": -0.0033288702834397554, "signal/accuracy_reward/centered_abs_mean": 0.176458740234375, "signal/accuracy_reward/group_std_mean": 0.23186054825782776, "signal/accuracy_reward/group_zero_std_frac": 0.35, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0882293701171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0882293701171875, "signal/advantage_abs_mean": 0.11125468015670777, "signal/advantage_pre_scale_abs_mean": 0.11125468015670777, "signal/advantage_pre_scale_std": 0.16140751540660858, "signal/advantage_std": 0.16140751540660858, "signal/brier_reward/centered_abs_mean": 0.16677133738994598, "signal/brier_reward/group_std_mean": 0.21192941665649415, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020846417173743248, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020846417173743248, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09038377106189728, "signal/confidence_uniqueness_reward/group_std_mean": 0.11349603980779648, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01129797138273716, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01129797138273716, "signal/format_reward/centered_abs_mean": 0.00263671875, "signal/format_reward/group_std_mean": 0.007397671649232507, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001318359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001318359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019801823887974023, "signal/frontier_aurc_reward/group_std_mean": 0.0030699548777192833, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5445262619759886e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5445262619759886e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1819765478372574, "signal/frontier_coverage_1/group_std_mean": 0.24308145940303802, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_10/centered_abs_mean": 0.1819765478372574, "signal/frontier_coverage_10/group_std_mean": 0.24308145940303802, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_15/centered_abs_mean": 0.1819765478372574, "signal/frontier_coverage_15/group_std_mean": 0.24308145940303802, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_20/centered_abs_mean": 0.1819765478372574, "signal/frontier_coverage_20/group_std_mean": 0.24308145940303802, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_25/centered_abs_mean": 0.1819765478372574, "signal/frontier_coverage_25/group_std_mean": 0.24308145940303802, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_5/centered_abs_mean": 0.1819765478372574, "signal/frontier_coverage_5/group_std_mean": 0.24308145940303802, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032573801465332506, "signal/frontier_ece_reward/centered_abs_mean": 0.08115999251604081, "signal/frontier_ece_reward/group_std_mean": 0.10086136162281037, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010144999064505101, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010144999064505101, "step": 35 }, { "calibration/aurc": 0.500457568524228, "calibration/batch_distribution_entropy": 0.8839443566974549, "calibration/buffer_distribution_entropy": 0.8121122106093612, "calibration/confidence_entropy": 0.5729897651788818, "calibration/coverage@0%": 0.0023468137254901962, "calibration/coverage@1%": 0.0023468137254901962, "calibration/coverage@10%": 0.0023468137254901962, "calibration/coverage@15%": 0.0023468137254901962, "calibration/coverage@20%": 0.015246620908637426, "calibration/coverage@25%": 0.016027870908637425, "calibration/coverage@30%": 0.04967818891255132, "calibration/coverage@5%": 0.0023468137254901962, "calibration/ece": 0.1492628622651326, "calibration/mean_confidence": 0.4656056361196665, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 742.6, "completions/max_terminated_length": 503.8, "completions/mean_length": 142.25283203125, "completions/mean_terminated_length": 142.11691589355468, "completions/min_length": 53.4, "completions/min_terminated_length": 53.4, "epoch": 0.128, "grad_norm": 0.004143570549786091, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 134266416.0, "reward": 0.9478516936302185, "reward_std": 0.1235954999923706, "rewards/accuracy_reward": 0.470703125, "rewards/brier_reward": 0.7474552869796753, "rewards/confidence_uniqueness_reward": 0.8561806559562684, "rewards/format_reward": 0.99873046875, "rewards/frontier_aurc_reward": -0.0040137280710041525, "rewards/frontier_coverage_1": 0.11203746348619462, "rewards/frontier_coverage_10": 0.11203746348619462, "rewards/frontier_coverage_15": 0.11203746348619462, "rewards/frontier_coverage_20": 0.11203746348619462, "rewards/frontier_coverage_25": 0.11203746348619462, "rewards/frontier_coverage_5": 0.11203746348619462, "rewards/frontier_ece_reward": 0.005755350179970265, "signal/accuracy_reward/centered_abs_mean": 0.156494140625, "signal/accuracy_reward/group_std_mean": 0.20581283569335937, "signal/accuracy_reward/group_zero_std_frac": 0.415625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0782470703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0782470703125, "signal/advantage_abs_mean": 0.09602416306734085, "signal/advantage_pre_scale_abs_mean": 0.09602416306734085, "signal/advantage_pre_scale_std": 0.1393231213092804, "signal/advantage_std": 0.1393231213092804, "signal/brier_reward/centered_abs_mean": 0.16182340383529664, "signal/brier_reward/group_std_mean": 0.20294124484062195, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02022792547941208, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02022792547941208, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07690812945365906, "signal/confidence_uniqueness_reward/group_std_mean": 0.0960095539689064, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009613516181707383, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009613516181707383, "signal/format_reward/centered_abs_mean": 0.002447509765625, "signal/format_reward/group_std_mean": 0.00684524467214942, "signal/format_reward/group_zero_std_frac": 0.9625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012237548828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012237548828125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013813054421916604, "signal/frontier_aurc_reward/group_std_mean": 0.002145401481539011, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4725366165512243e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4725366165512243e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21806600689888, "signal/frontier_coverage_1/group_std_mean": 0.2785297632217407, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_10/centered_abs_mean": 0.21806600689888, "signal/frontier_coverage_10/group_std_mean": 0.2785297632217407, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_15/centered_abs_mean": 0.21806600689888, "signal/frontier_coverage_15/group_std_mean": 0.2785297632217407, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_20/centered_abs_mean": 0.21806600689888, "signal/frontier_coverage_20/group_std_mean": 0.2785297632217407, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_25/centered_abs_mean": 0.21806600689888, "signal/frontier_coverage_25/group_std_mean": 0.2785297632217407, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_5/centered_abs_mean": 0.21806600689888, "signal/frontier_coverage_5/group_std_mean": 0.2785297632217407, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039033814333379268, "signal/frontier_ece_reward/centered_abs_mean": 0.05941944047808647, "signal/frontier_ece_reward/group_std_mean": 0.07643859535455703, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007427430059760809, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007427430059760809, "step": 40 }, { "calibration/aurc": 0.3110369117140962, "calibration/batch_distribution_entropy": 0.9074994904593658, "calibration/buffer_distribution_entropy": 0.8566246693752007, "calibration/confidence_entropy": 0.5313453136877156, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.03671875, "calibration/coverage@15%": 0.11328125, "calibration/coverage@20%": 0.224609375, "calibration/coverage@25%": 0.3406441108121331, "calibration/coverage@30%": 0.4304878608121331, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.23118559873124087, "calibration/mean_confidence": 0.39543612093607816, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1101.4, "completions/max_terminated_length": 424.2, "completions/mean_length": 146.07265625, "completions/mean_terminated_length": 145.52989501953124, "completions/min_length": 47.4, "completions/min_terminated_length": 47.4, "epoch": 0.144, "grad_norm": 0.0035824107471853495, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 150712632.0, "reward": 0.9979648113250732, "reward_std": 0.11293403208255767, "rewards/accuracy_reward": 0.5818359375, "rewards/brier_reward": 0.7256381988525391, "rewards/confidence_uniqueness_reward": 0.8713930606842041, "rewards/format_reward": 0.99853515625, "rewards/frontier_aurc_reward": -0.0034363477025181055, "rewards/frontier_coverage_1": 0.057438090443611145, "rewards/frontier_coverage_10": 0.057438090443611145, "rewards/frontier_coverage_15": 0.057438090443611145, "rewards/frontier_coverage_20": 0.057438090443611145, "rewards/frontier_coverage_25": 0.057438090443611145, "rewards/frontier_coverage_5": 0.057438090443611145, "rewards/frontier_ece_reward": 0.016344105079770088, "signal/accuracy_reward/centered_abs_mean": 0.156396484375, "signal/accuracy_reward/group_std_mean": 0.2017151564359665, "signal/accuracy_reward/group_zero_std_frac": 0.434375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0781982421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0781982421875, "signal/advantage_abs_mean": 0.0892390176653862, "signal/advantage_pre_scale_abs_mean": 0.0892390176653862, "signal/advantage_pre_scale_std": 0.12830377966165543, "signal/advantage_std": 0.12830377966165543, "signal/brier_reward/centered_abs_mean": 0.17482829093933105, "signal/brier_reward/group_std_mean": 0.21727988123893738, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021853536367416382, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021853536367416382, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06832201182842254, "signal/confidence_uniqueness_reward/group_std_mean": 0.08698472678661347, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008540251478552818, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008540251478552818, "signal/format_reward/centered_abs_mean": 0.002496337890625, "signal/format_reward/group_std_mean": 0.005241806851699948, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012481689453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012481689453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013276722747832537, "signal/frontier_aurc_reward/group_std_mean": 0.002048111497424543, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3765333025949076e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3765333025949076e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.25025501251220705, "signal/frontier_coverage_1/group_std_mean": 0.3121617794036865, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_10/centered_abs_mean": 0.25025501251220705, "signal/frontier_coverage_10/group_std_mean": 0.3121617794036865, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_15/centered_abs_mean": 0.25025501251220705, "signal/frontier_coverage_15/group_std_mean": 0.3121617794036865, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_20/centered_abs_mean": 0.25025501251220705, "signal/frontier_coverage_20/group_std_mean": 0.3121617794036865, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_25/centered_abs_mean": 0.25025501251220705, "signal/frontier_coverage_25/group_std_mean": 0.3121617794036865, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_5/centered_abs_mean": 0.25025501251220705, "signal/frontier_coverage_5/group_std_mean": 0.3121617794036865, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044795645400881766, "signal/frontier_ece_reward/centered_abs_mean": 0.047294650226831436, "signal/frontier_ece_reward/group_std_mean": 0.06447840631008148, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0059118312783539295, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0059118312783539295, "step": 45 }, { "calibration/aurc": 0.38340311213282363, "calibration/batch_distribution_entropy": 0.9228495953097084, "calibration/buffer_distribution_entropy": 0.8920677169770166, "calibration/confidence_entropy": 0.5123542013682538, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.017993211839530333, "calibration/coverage@15%": 0.05277336105675147, "calibration/coverage@20%": 0.08248608732876712, "calibration/coverage@25%": 0.15869465508806263, "calibration/coverage@30%": 0.28567453522504893, "calibration/coverage@5%": 0.00546875, "calibration/ece": 0.12188082602439854, "calibration/mean_confidence": 0.3888530788440171, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 870.8, "completions/max_terminated_length": 385.8, "completions/mean_length": 149.441015625, "completions/mean_terminated_length": 149.17046203613282, "completions/min_length": 55.8, "completions/min_terminated_length": 55.8, "epoch": 0.16, "grad_norm": 0.00288645108230412, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 167263836.0, "reward": 0.9865847229957581, "reward_std": 0.11459582149982453, "rewards/accuracy_reward": 0.537890625, "rewards/brier_reward": 0.7459044456481934, "rewards/confidence_uniqueness_reward": 0.8809366464614868, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.0034455065149813892, "rewards/frontier_coverage_1": 0.11904234737157822, "rewards/frontier_coverage_10": 0.11904234737157822, "rewards/frontier_coverage_15": 0.11904234737157822, "rewards/frontier_coverage_20": 0.11904234737157822, "rewards/frontier_coverage_25": 0.11904234737157822, "rewards/frontier_coverage_5": 0.11904234737157822, "rewards/frontier_ece_reward": 0.016392653435468675, "signal/accuracy_reward/centered_abs_mean": 0.1531494140625, "signal/accuracy_reward/group_std_mean": 0.20196100175380707, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07657470703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07657470703125, "signal/advantage_abs_mean": 0.08890287727117538, "signal/advantage_pre_scale_abs_mean": 0.08890287727117538, "signal/advantage_pre_scale_std": 0.12889028787612916, "signal/advantage_std": 0.12889028787612916, "signal/brier_reward/centered_abs_mean": 0.17595805823802949, "signal/brier_reward/group_std_mean": 0.2205444246530533, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021994757279753686, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021994757279753686, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06260188668966293, "signal/confidence_uniqueness_reward/group_std_mean": 0.08365204632282257, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007825235836207867, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007825235836207867, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_std_mean": 0.0051879632286727425, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.00156771473120898, "signal/frontier_aurc_reward/group_std_mean": 0.0023965310771018266, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8062092314939947e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8062092314939947e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2516119539737701, "signal/frontier_coverage_1/group_std_mean": 0.31743831038475034, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_10/centered_abs_mean": 0.2516119539737701, "signal/frontier_coverage_10/group_std_mean": 0.31743831038475034, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_15/centered_abs_mean": 0.2516119539737701, "signal/frontier_coverage_15/group_std_mean": 0.31743831038475034, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_20/centered_abs_mean": 0.2516119539737701, "signal/frontier_coverage_20/group_std_mean": 0.31743831038475034, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_25/centered_abs_mean": 0.2516119539737701, "signal/frontier_coverage_25/group_std_mean": 0.31743831038475034, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_5/centered_abs_mean": 0.2516119539737701, "signal/frontier_coverage_5/group_std_mean": 0.31743831038475034, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004503853805363178, "signal/frontier_ece_reward/centered_abs_mean": 0.04847268611192703, "signal/frontier_ece_reward/group_std_mean": 0.06642893105745315, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006059085763990879, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006059085763990879, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.581942546964942, "eval_calibration/batch_distribution_entropy": 0.8694379702142905, "eval_calibration/buffer_distribution_entropy": 0.9069787191727915, "eval_calibration/confidence_entropy": 0.5197218388047841, "eval_calibration/coverage@0%": 0.008064516129032258, "eval_calibration/coverage@1%": 0.008064516129032258, "eval_calibration/coverage@10%": 0.09400201612903225, "eval_calibration/coverage@15%": 0.09400201612903225, "eval_calibration/coverage@20%": 0.13306451612903225, "eval_calibration/coverage@25%": 0.16507056451612903, "eval_calibration/coverage@30%": 0.18850806451612903, "eval_calibration/coverage@5%": 0.008064516129032258, "eval_calibration/ece": 0.23922127016129036, "eval_calibration/mean_confidence": 0.4247958669354839, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 596.75, "eval_completions/max_terminated_length": 282.5, "eval_completions/mean_length": 156.9091453552246, "eval_completions/mean_terminated_length": 154.21026611328125, "eval_completions/min_length": 81.75, "eval_completions/min_terminated_length": 81.75, "eval_loss": 0.0, "eval_num_tokens": 167263836.0, "eval_reward": 0.9125984758138657, "eval_reward_std": 0.21672571077942848, "eval_rewards/accuracy_reward": 0.37109375, "eval_rewards/brier_reward": 0.7805570214986801, "eval_rewards/confidence_uniqueness_reward": 0.8489478528499603, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.0037371510406956077, "eval_rewards/frontier_coverage_1": 0.20891405642032623, "eval_rewards/frontier_coverage_10": 0.20891405642032623, "eval_rewards/frontier_coverage_15": 0.20891405642032623, "eval_rewards/frontier_coverage_20": 0.20891405642032623, "eval_rewards/frontier_coverage_25": 0.20891405642032623, "eval_rewards/frontier_coverage_5": 0.20891405642032623, "eval_rewards/frontier_ece_reward": 0.01575645850971341, "eval_runtime": 25.9605, "eval_samples_per_second": 19.26, "eval_signal/accuracy_reward/centered_abs_mean": 0.453125, "eval_signal/accuracy_reward/group_std_mean": 0.48237285763025284, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2265625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2265625, "eval_signal/advantage_abs_mean": 0.19144192337989807, "eval_signal/advantage_pre_scale_abs_mean": 0.19144192337989807, "eval_signal/advantage_pre_scale_std": 0.21519476547837257, "eval_signal/advantage_std": 0.21519476547837257, "eval_signal/brier_reward/centered_abs_mean": 0.1979050487279892, "eval_signal/brier_reward/group_std_mean": 0.24851922690868378, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02473813109099865, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02473813109099865, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06338747031986713, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07939925417304039, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007923433789983392, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007923433789983392, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00245083641493693, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004007689480204135, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.386997170513496e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.386997170513496e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.39275022596120834, "eval_signal/frontier_coverage_1/group_std_mean": 0.4746975228190422, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.39275022596120834, "eval_signal/frontier_coverage_10/group_std_mean": 0.4746975228190422, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.39275022596120834, "eval_signal/frontier_coverage_15/group_std_mean": 0.4746975228190422, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.39275022596120834, "eval_signal/frontier_coverage_20/group_std_mean": 0.4746975228190422, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.39275022596120834, "eval_signal/frontier_coverage_25/group_std_mean": 0.4746975228190422, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.39275022596120834, "eval_signal/frontier_coverage_5/group_std_mean": 0.4746975228190422, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007030228851363063, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05723079666495323, "eval_signal/frontier_ece_reward/group_std_mean": 0.08887772634625435, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007153849583119154, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007153849583119154, "eval_steps_per_second": 0.154, "step": 50 }, { "calibration/aurc": 0.41148073571425414, "calibration/batch_distribution_entropy": 0.9665159815171023, "calibration/buffer_distribution_entropy": 0.9138723263746295, "calibration/confidence_entropy": 0.49411856057329295, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0265625, "calibration/coverage@20%": 0.0421875, "calibration/coverage@25%": 0.07421875, "calibration/coverage@30%": 0.173828125, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16889198610565095, "calibration/mean_confidence": 0.4860307061398975, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.4, "completions/max_terminated_length": 455.4, "completions/mean_length": 155.9359375, "completions/mean_terminated_length": 155.9359375, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.176, "grad_norm": 0.00218537007458508, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 184097740.0, "reward": 0.9889800906181335, "reward_std": 0.11108050644397735, "rewards/accuracy_reward": 0.53056640625, "rewards/brier_reward": 0.7610553503036499, "rewards/confidence_uniqueness_reward": 0.8953521609306335, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0036368840374052525, "rewards/frontier_coverage_1": 0.13700859993696213, "rewards/frontier_coverage_10": 0.13700859993696213, "rewards/frontier_coverage_15": 0.13700859993696213, "rewards/frontier_coverage_20": 0.13700859993696213, "rewards/frontier_coverage_25": 0.13700859993696213, "rewards/frontier_coverage_5": 0.13700859993696213, "rewards/frontier_ece_reward": 0.018704849109053612, "signal/accuracy_reward/centered_abs_mean": 0.137249755859375, "signal/accuracy_reward/group_std_mean": 0.18366769552230836, "signal/accuracy_reward/group_zero_std_frac": 0.465625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686248779296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0686248779296875, "signal/advantage_abs_mean": 0.084813691675663, "signal/advantage_pre_scale_abs_mean": 0.084813691675663, "signal/advantage_pre_scale_std": 0.12855230122804642, "signal/advantage_std": 0.12855230122804642, "signal/brier_reward/centered_abs_mean": 0.1753113955259323, "signal/brier_reward/group_std_mean": 0.22086445689201356, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02191392444074154, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02191392444074154, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.046444494277238846, "signal/confidence_uniqueness_reward/group_std_mean": 0.05864086300134659, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005805561784654856, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005805561784654856, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_std_mean": 0.0035306816454976795, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023333648685365917, "signal/frontier_aurc_reward/group_std_mean": 0.003552594967186451, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.176723159616813e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.176723159616813e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2309940814971924, "signal/frontier_coverage_1/group_std_mean": 0.2958366394042969, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_10/centered_abs_mean": 0.2309940814971924, "signal/frontier_coverage_10/group_std_mean": 0.2958366394042969, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_15/centered_abs_mean": 0.2309940814971924, "signal/frontier_coverage_15/group_std_mean": 0.2958366394042969, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_20/centered_abs_mean": 0.2309940814971924, "signal/frontier_coverage_20/group_std_mean": 0.2958366394042969, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_25/centered_abs_mean": 0.2309940814971924, "signal/frontier_coverage_25/group_std_mean": 0.2958366394042969, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_5/centered_abs_mean": 0.2309940814971924, "signal/frontier_coverage_5/group_std_mean": 0.2958366394042969, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004134793765842915, "signal/frontier_ece_reward/centered_abs_mean": 0.056712330877780916, "signal/frontier_ece_reward/group_std_mean": 0.07508723661303521, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0070890413597226145, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0070890413597226145, "step": 55 }, { "calibration/aurc": 0.35110534394587606, "calibration/batch_distribution_entropy": 0.9429500544063417, "calibration/buffer_distribution_entropy": 0.9228942481105176, "calibration/confidence_entropy": 0.44029570897924264, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0410958904109589, "calibration/coverage@15%": 0.06105675146771037, "calibration/coverage@20%": 0.11779216609589041, "calibration/coverage@25%": 0.133446978962818, "calibration/coverage@30%": 0.3190290178571429, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1277301548740215, "calibration/mean_confidence": 0.5812738931017611, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1073.6, "completions/max_terminated_length": 403.4, "completions/mean_length": 157.32744140625, "completions/mean_terminated_length": 156.7888427734375, "completions/min_length": 55.2, "completions/min_terminated_length": 55.2, "epoch": 0.192, "grad_norm": 0.0026588267646729946, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 200523589.0, "reward": 0.9892240047454834, "reward_std": 0.11632921844720841, "rewards/accuracy_reward": 0.5302734375, "rewards/brier_reward": 0.7743942737579346, "rewards/confidence_uniqueness_reward": 0.8843107342720031, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.0036770137492567303, "rewards/frontier_coverage_1": 0.1299908846616745, "rewards/frontier_coverage_10": 0.1299908846616745, "rewards/frontier_coverage_15": 0.1299908846616745, "rewards/frontier_coverage_20": 0.1299908846616745, "rewards/frontier_coverage_25": 0.1299908846616745, "rewards/frontier_coverage_5": 0.1299908846616745, "rewards/frontier_ece_reward": 0.026738068088889122, "signal/accuracy_reward/centered_abs_mean": 0.1340087890625, "signal/accuracy_reward/group_std_mean": 0.17480367571115493, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06700439453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06700439453125, "signal/advantage_abs_mean": 0.08978550434112549, "signal/advantage_pre_scale_abs_mean": 0.08978550434112549, "signal/advantage_pre_scale_std": 0.1373380169272423, "signal/advantage_std": 0.1373380169272423, "signal/brier_reward/centered_abs_mean": 0.17695107460021972, "signal/brier_reward/group_std_mean": 0.22387023866176606, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022118884325027465, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022118884325027465, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.060887254774570465, "signal/confidence_uniqueness_reward/group_std_mean": 0.07694388255476951, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007610906846821308, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007610906846821308, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.00552427158690989, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032003792934119702, "signal/frontier_aurc_reward/group_std_mean": 0.004674886912107467, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7286787341581655e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7286787341581655e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19972616136074067, "signal/frontier_coverage_1/group_std_mean": 0.25967652797698976, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_10/centered_abs_mean": 0.19972616136074067, "signal/frontier_coverage_10/group_std_mean": 0.25967652797698976, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_15/centered_abs_mean": 0.19972616136074067, "signal/frontier_coverage_15/group_std_mean": 0.25967652797698976, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_20/centered_abs_mean": 0.19972616136074067, "signal/frontier_coverage_20/group_std_mean": 0.25967652797698976, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_25/centered_abs_mean": 0.19972616136074067, "signal/frontier_coverage_25/group_std_mean": 0.25967652797698976, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_5/centered_abs_mean": 0.19972616136074067, "signal/frontier_coverage_5/group_std_mean": 0.25967652797698976, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003575098142027855, "signal/frontier_ece_reward/centered_abs_mean": 0.06527650877833366, "signal/frontier_ece_reward/group_std_mean": 0.08204113095998763, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008159563597291707, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008159563597291707, "step": 60 }, { "calibration/aurc": 0.29950539905285317, "calibration/batch_distribution_entropy": 0.8949305749179007, "calibration/buffer_distribution_entropy": 0.9254220054486639, "calibration/confidence_entropy": 0.3947662440191003, "calibration/coverage@0%": 0.011328125, "calibration/coverage@1%": 0.011328125, "calibration/coverage@10%": 0.11328125, "calibration/coverage@15%": 0.155078125, "calibration/coverage@20%": 0.29765625, "calibration/coverage@25%": 0.43984375, "calibration/coverage@30%": 0.565625, "calibration/coverage@5%": 0.01953125, "calibration/ece": 0.14129778085249506, "calibration/mean_confidence": 0.6134717121758806, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 625.0, "completions/max_terminated_length": 445.8, "completions/mean_length": 157.33720703125, "completions/mean_terminated_length": 157.20322265625, "completions/min_length": 53.6, "completions/min_terminated_length": 53.6, "epoch": 0.208, "grad_norm": 0.006198456976562738, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 217166946.0, "reward": 1.0055761933326721, "reward_std": 0.12328730970621109, "rewards/accuracy_reward": 0.56953125, "rewards/brier_reward": 0.7748962879180908, "rewards/confidence_uniqueness_reward": 0.8710411190986633, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0033608878031373022, "rewards/frontier_coverage_1": 0.10492202192544937, "rewards/frontier_coverage_10": 0.10492202192544937, "rewards/frontier_coverage_15": 0.10492202192544937, "rewards/frontier_coverage_20": 0.10492202192544937, "rewards/frontier_coverage_25": 0.10492202192544937, "rewards/frontier_coverage_5": 0.10492202192544937, "rewards/frontier_ece_reward": 0.034004238247871396, "signal/accuracy_reward/centered_abs_mean": 0.13963623046875, "signal/accuracy_reward/group_std_mean": 0.186881947517395, "signal/accuracy_reward/group_zero_std_frac": 0.459375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.069818115234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.069818115234375, "signal/advantage_abs_mean": 0.09355712085962295, "signal/advantage_pre_scale_abs_mean": 0.09355712085962295, "signal/advantage_pre_scale_std": 0.14598494470119477, "signal/advantage_std": 0.14598494470119477, "signal/brier_reward/centered_abs_mean": 0.18199315667152405, "signal/brier_reward/group_std_mean": 0.2308054745197296, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022749144583940506, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022749144583940506, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08029745370149613, "signal/confidence_uniqueness_reward/group_std_mean": 0.10032221227884293, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010037181712687016, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010037181712687016, "signal/format_reward/centered_abs_mean": 0.00150146484375, "signal/format_reward/group_std_mean": 0.004083108762279153, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000750732421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036689775064587594, "signal/frontier_aurc_reward/group_std_mean": 0.0054342994466423985, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.567469317815266e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.567469317815266e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18830927908420564, "signal/frontier_coverage_1/group_std_mean": 0.25073177814483644, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_10/centered_abs_mean": 0.18830927908420564, "signal/frontier_coverage_10/group_std_mean": 0.25073177814483644, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_15/centered_abs_mean": 0.18830927908420564, "signal/frontier_coverage_15/group_std_mean": 0.25073177814483644, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_20/centered_abs_mean": 0.18830927908420564, "signal/frontier_coverage_20/group_std_mean": 0.25073177814483644, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_25/centered_abs_mean": 0.18830927908420564, "signal/frontier_coverage_25/group_std_mean": 0.25073177814483644, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_5/centered_abs_mean": 0.18830927908420564, "signal/frontier_coverage_5/group_std_mean": 0.25073177814483644, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00337073584087193, "signal/frontier_ece_reward/centered_abs_mean": 0.06522954106330872, "signal/frontier_ece_reward/group_std_mean": 0.08197070807218551, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00815369263291359, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00815369263291359, "step": 65 }, { "calibration/aurc": 0.32836925315578147, "calibration/batch_distribution_entropy": 0.9369705991630376, "calibration/buffer_distribution_entropy": 0.9288580259601347, "calibration/confidence_entropy": 0.39559583362140976, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.08088307240704501, "calibration/coverage@15%": 0.16330494740704501, "calibration/coverage@20%": 0.30007262108610566, "calibration/coverage@25%": 0.45020104574363995, "calibration/coverage@30%": 0.5385235750978474, "calibration/coverage@5%": 0.00859375, "calibration/ece": 0.15778186841818104, "calibration/mean_confidence": 0.5168032271358796, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 1333.4, "completions/max_terminated_length": 457.4, "completions/mean_length": 155.1294921875, "completions/mean_terminated_length": 154.18482055664063, "completions/min_length": 69.2, "completions/min_terminated_length": 69.2, "epoch": 0.224, "grad_norm": 0.0021975021809339523, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 233908656.0, "reward": 0.9999201774597168, "reward_std": 0.11371375173330307, "rewards/accuracy_reward": 0.54541015625, "rewards/brier_reward": 0.7730230927467346, "rewards/confidence_uniqueness_reward": 0.8957045555114747, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0036701176781207324, "rewards/frontier_coverage_1": 0.14951011687517166, "rewards/frontier_coverage_10": 0.14951011687517166, "rewards/frontier_coverage_15": 0.14951011687517166, "rewards/frontier_coverage_20": 0.14951011687517166, "rewards/frontier_coverage_25": 0.14951011687517166, "rewards/frontier_coverage_5": 0.14951011687517166, "rewards/frontier_ece_reward": 0.023793780989944936, "signal/accuracy_reward/centered_abs_mean": 0.129779052734375, "signal/accuracy_reward/group_std_mean": 0.17483537197113036, "signal/accuracy_reward/group_zero_std_frac": 0.490625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0648895263671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0648895263671875, "signal/advantage_abs_mean": 0.0853941187262535, "signal/advantage_pre_scale_abs_mean": 0.0853941187262535, "signal/advantage_pre_scale_std": 0.1344393938779831, "signal/advantage_std": 0.1344393938779831, "signal/brier_reward/centered_abs_mean": 0.1850574344396591, "signal/brier_reward/group_std_mean": 0.23437364101409913, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02313217930495739, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02313217930495739, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0607163667678833, "signal/confidence_uniqueness_reward/group_std_mean": 0.07633327543735505, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007589545845985412, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007589545845985412, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0035295146983116863, "signal/frontier_aurc_reward/group_std_mean": 0.005266764014959335, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.317830993793905e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.317830993793905e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20874929428100586, "signal/frontier_coverage_1/group_std_mean": 0.2724692106246948, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_10/centered_abs_mean": 0.20874929428100586, "signal/frontier_coverage_10/group_std_mean": 0.2724692106246948, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_15/centered_abs_mean": 0.20874929428100586, "signal/frontier_coverage_15/group_std_mean": 0.2724692106246948, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_20/centered_abs_mean": 0.20874929428100586, "signal/frontier_coverage_20/group_std_mean": 0.2724692106246948, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_25/centered_abs_mean": 0.20874929428100586, "signal/frontier_coverage_25/group_std_mean": 0.2724692106246948, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_5/centered_abs_mean": 0.20874929428100586, "signal/frontier_coverage_5/group_std_mean": 0.2724692106246948, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003736612340435386, "signal/frontier_ece_reward/centered_abs_mean": 0.056064750999212265, "signal/frontier_ece_reward/group_std_mean": 0.07063852250576019, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007008093874901533, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007008093874901533, "step": 70 }, { "calibration/aurc": 0.3625857217361375, "calibration/batch_distribution_entropy": 0.9289938394921892, "calibration/buffer_distribution_entropy": 0.9335108002509734, "calibration/confidence_entropy": 0.40780732318505564, "calibration/coverage@0%": 0.0125, "calibration/coverage@1%": 0.0125, "calibration/coverage@10%": 0.10625, "calibration/coverage@15%": 0.188671875, "calibration/coverage@20%": 0.28954791462818, "calibration/coverage@25%": 0.31848779965753427, "calibration/coverage@30%": 0.3583766511741683, "calibration/coverage@5%": 0.0125, "calibration/ece": 0.18316088370397798, "calibration/mean_confidence": 0.5532851800314939, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1052.6, "completions/max_terminated_length": 372.2, "completions/mean_length": 155.53154296875, "completions/mean_terminated_length": 155.12725524902345, "completions/min_length": 71.6, "completions/min_terminated_length": 71.6, "epoch": 0.24, "grad_norm": 0.0017300838371738791, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 250752979.0, "reward": 1.0221168398857117, "reward_std": 0.11391993910074234, "rewards/accuracy_reward": 0.595703125, "rewards/brier_reward": 0.7676220774650574, "rewards/confidence_uniqueness_reward": 0.9071184039115906, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002962377923540771, "rewards/frontier_coverage_1": 0.10575190596282483, "rewards/frontier_coverage_10": 0.10575190596282483, "rewards/frontier_coverage_15": 0.10575190596282483, "rewards/frontier_coverage_20": 0.10575190596282483, "rewards/frontier_coverage_25": 0.10575190596282483, "rewards/frontier_coverage_5": 0.10575190596282483, "rewards/frontier_ece_reward": 0.030115915276110173, "signal/accuracy_reward/centered_abs_mean": 0.14744873046875, "signal/accuracy_reward/group_std_mean": 0.1908715397119522, "signal/accuracy_reward/group_zero_std_frac": 0.46875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.073724365234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.073724365234375, "signal/advantage_abs_mean": 0.08909272402524948, "signal/advantage_pre_scale_abs_mean": 0.08909272402524948, "signal/advantage_pre_scale_std": 0.13636419773101807, "signal/advantage_std": 0.13636419773101807, "signal/brier_reward/centered_abs_mean": 0.1880528837442398, "signal/brier_reward/group_std_mean": 0.2371793121099472, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023506610468029977, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023506610468029977, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.053541189432144164, "signal/confidence_uniqueness_reward/group_std_mean": 0.06544121354818344, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0066926486790180205, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0066926486790180205, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032780596986413, "signal/frontier_aurc_reward/group_std_mean": 0.004835722874850035, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.867726795258932e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.867726795258932e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21991788744926452, "signal/frontier_coverage_1/group_std_mean": 0.28908875584602356, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_10/centered_abs_mean": 0.21991788744926452, "signal/frontier_coverage_10/group_std_mean": 0.28908875584602356, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_15/centered_abs_mean": 0.21991788744926452, "signal/frontier_coverage_15/group_std_mean": 0.28908875584602356, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_20/centered_abs_mean": 0.21991788744926452, "signal/frontier_coverage_20/group_std_mean": 0.28908875584602356, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_25/centered_abs_mean": 0.21991788744926452, "signal/frontier_coverage_25/group_std_mean": 0.28908875584602356, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_5/centered_abs_mean": 0.21991788744926452, "signal/frontier_coverage_5/group_std_mean": 0.28908875584602356, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003936530090868473, "signal/frontier_ece_reward/centered_abs_mean": 0.05272270888090134, "signal/frontier_ece_reward/group_std_mean": 0.06555544286966324, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006590338610112667, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006590338610112667, "step": 75 }, { "calibration/aurc": 0.2991545526086894, "calibration/batch_distribution_entropy": 0.917100104209035, "calibration/buffer_distribution_entropy": 0.9366676884977749, "calibration/confidence_entropy": 0.374372114525325, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.21171875, "calibration/coverage@15%": 0.30390625, "calibration/coverage@20%": 0.38515625, "calibration/coverage@25%": 0.48828125, "calibration/coverage@30%": 0.59453125, "calibration/coverage@5%": 0.05703125, "calibration/ece": 0.13661148577754403, "calibration/mean_confidence": 0.5016744681145004, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 618.8, "completions/max_terminated_length": 421.6, "completions/mean_length": 152.70751953125, "completions/mean_terminated_length": 152.5725067138672, "completions/min_length": 72.2, "completions/min_terminated_length": 72.2, "epoch": 0.256, "grad_norm": 0.004246581345796585, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 267371520.0, "reward": 1.0221374034881592, "reward_std": 0.10346800982952117, "rewards/accuracy_reward": 0.58359375, "rewards/brier_reward": 0.7830183148384094, "rewards/confidence_uniqueness_reward": 0.9105902791023255, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002707870095036924, "rewards/frontier_coverage_1": 0.14056457132101058, "rewards/frontier_coverage_10": 0.14056457132101058, "rewards/frontier_coverage_15": 0.14056457132101058, "rewards/frontier_coverage_20": 0.14056457132101058, "rewards/frontier_coverage_25": 0.14056457132101058, "rewards/frontier_coverage_5": 0.14056457132101058, "rewards/frontier_ece_reward": 0.029511995241045953, "signal/accuracy_reward/centered_abs_mean": 0.1346435546875, "signal/accuracy_reward/group_std_mean": 0.17472511231899263, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06732177734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06732177734375, "signal/advantage_abs_mean": 0.07942169755697251, "signal/advantage_pre_scale_abs_mean": 0.07942169755697251, "signal/advantage_pre_scale_std": 0.12556920498609542, "signal/advantage_std": 0.12556920498609542, "signal/brier_reward/centered_abs_mean": 0.18081169128417968, "signal/brier_reward/group_std_mean": 0.22884117662906647, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02260146141052246, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02260146141052246, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05464339852333069, "signal/confidence_uniqueness_reward/group_std_mean": 0.06681963428854942, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006830424815416336, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006830424815416336, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003052409226074815, "signal/frontier_aurc_reward/group_std_mean": 0.00458022365346551, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4638121946481986e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4638121946481986e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22503261864185334, "signal/frontier_coverage_1/group_std_mean": 0.29037556052207947, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_10/centered_abs_mean": 0.22503261864185334, "signal/frontier_coverage_10/group_std_mean": 0.29037556052207947, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_15/centered_abs_mean": 0.22503261864185334, "signal/frontier_coverage_15/group_std_mean": 0.29037556052207947, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_20/centered_abs_mean": 0.22503261864185334, "signal/frontier_coverage_20/group_std_mean": 0.29037556052207947, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_25/centered_abs_mean": 0.22503261864185334, "signal/frontier_coverage_25/group_std_mean": 0.29037556052207947, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_5/centered_abs_mean": 0.22503261864185334, "signal/frontier_coverage_5/group_std_mean": 0.29037556052207947, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004028083616867661, "signal/frontier_ece_reward/centered_abs_mean": 0.04379315301775932, "signal/frontier_ece_reward/group_std_mean": 0.056234460324048996, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005474144127219915, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005474144127219915, "step": 80 }, { "calibration/aurc": 0.363860036835348, "calibration/batch_distribution_entropy": 0.9570702198192571, "calibration/buffer_distribution_entropy": 0.9406893351280925, "calibration/confidence_entropy": 0.4103928165246762, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.092578125, "calibration/coverage@15%": 0.16953125, "calibration/coverage@20%": 0.246875, "calibration/coverage@25%": 0.2890625, "calibration/coverage@30%": 0.352734375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16451098025568126, "calibration/mean_confidence": 0.4967080330168347, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 592.6, "completions/max_terminated_length": 364.2, "completions/mean_length": 159.1400390625, "completions/mean_terminated_length": 158.87093811035157, "completions/min_length": 65.8, "completions/min_terminated_length": 65.8, "epoch": 0.272, "grad_norm": 0.006795101799070835, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 283966810.0, "reward": 1.0148004293441772, "reward_std": 0.10174374878406525, "rewards/accuracy_reward": 0.56806640625, "rewards/brier_reward": 0.7736626982688903, "rewards/confidence_uniqueness_reward": 0.9271513104438782, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0029262469615787268, "rewards/frontier_coverage_1": 0.14347197711467743, "rewards/frontier_coverage_10": 0.14347197711467743, "rewards/frontier_coverage_15": 0.14347197711467743, "rewards/frontier_coverage_20": 0.14347197711467743, "rewards/frontier_coverage_25": 0.14347197711467743, "rewards/frontier_coverage_5": 0.14347197711467743, "rewards/frontier_ece_reward": 0.024034282192587852, "signal/accuracy_reward/centered_abs_mean": 0.126641845703125, "signal/accuracy_reward/group_std_mean": 0.1672067701816559, "signal/accuracy_reward/group_zero_std_frac": 0.51875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0633209228515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0633209228515625, "signal/advantage_abs_mean": 0.0784185141324997, "signal/advantage_pre_scale_abs_mean": 0.0784185141324997, "signal/advantage_pre_scale_std": 0.12320059090852738, "signal/advantage_std": 0.12320059090852738, "signal/brier_reward/centered_abs_mean": 0.1773090809583664, "signal/brier_reward/group_std_mean": 0.2246845543384552, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0221636351197958, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0221636351197958, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.044048906117677686, "signal/confidence_uniqueness_reward/group_std_mean": 0.055270757526159286, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005506113264709711, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005506113264709711, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002721975650638342, "signal/frontier_aurc_reward/group_std_mean": 0.004123077914118767, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.872336139669642e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.872336139669642e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2235881805419922, "signal/frontier_coverage_1/group_std_mean": 0.2899299919605255, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_10/centered_abs_mean": 0.2235881805419922, "signal/frontier_coverage_10/group_std_mean": 0.2899299919605255, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_15/centered_abs_mean": 0.2235881805419922, "signal/frontier_coverage_15/group_std_mean": 0.2899299919605255, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_20/centered_abs_mean": 0.2235881805419922, "signal/frontier_coverage_20/group_std_mean": 0.2899299919605255, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_25/centered_abs_mean": 0.2235881805419922, "signal/frontier_coverage_25/group_std_mean": 0.2899299919605255, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_5/centered_abs_mean": 0.2235881805419922, "signal/frontier_coverage_5/group_std_mean": 0.2899299919605255, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004002228379249573, "signal/frontier_ece_reward/centered_abs_mean": 0.040379713475704196, "signal/frontier_ece_reward/group_std_mean": 0.05107036232948303, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050474641844630245, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050474641844630245, "step": 85 }, { "calibration/aurc": 0.36422554783602823, "calibration/batch_distribution_entropy": 0.9481445874016821, "calibration/buffer_distribution_entropy": 0.9444843703339039, "calibration/confidence_entropy": 0.4098080527850782, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.061328125, "calibration/coverage@15%": 0.07109375, "calibration/coverage@20%": 0.10078125, "calibration/coverage@25%": 0.11484375, "calibration/coverage@30%": 0.2484375, "calibration/coverage@5%": 0.048046875, "calibration/ece": 0.15095390849412324, "calibration/mean_confidence": 0.5471646791933227, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1045.4, "completions/max_terminated_length": 389.8, "completions/mean_length": 155.80673828125, "completions/mean_terminated_length": 155.402783203125, "completions/min_length": 74.4, "completions/min_terminated_length": 74.4, "epoch": 0.288, "grad_norm": 0.005369322374463081, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 300520447.0, "reward": 1.0132103562355042, "reward_std": 0.10634560137987137, "rewards/accuracy_reward": 0.56318359375, "rewards/brier_reward": 0.7749075770378113, "rewards/confidence_uniqueness_reward": 0.9406777501106263, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0030640484765172005, "rewards/frontier_coverage_1": 0.13456785976886748, "rewards/frontier_coverage_10": 0.13456785976886748, "rewards/frontier_coverage_15": 0.13456785976886748, "rewards/frontier_coverage_20": 0.13456785976886748, "rewards/frontier_coverage_25": 0.13456785976886748, "rewards/frontier_coverage_5": 0.13456785976886748, "rewards/frontier_ece_reward": 0.023743505217134954, "signal/accuracy_reward/centered_abs_mean": 0.137371826171875, "signal/accuracy_reward/group_std_mean": 0.18289859890937804, "signal/accuracy_reward/group_zero_std_frac": 0.471875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686859130859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0686859130859375, "signal/advantage_abs_mean": 0.08051378279924393, "signal/advantage_pre_scale_abs_mean": 0.08051378279924393, "signal/advantage_pre_scale_std": 0.12663117051124573, "signal/advantage_std": 0.12663117051124573, "signal/brier_reward/centered_abs_mean": 0.1794063478708267, "signal/brier_reward/group_std_mean": 0.22867206931114198, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02242579348385334, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02242579348385334, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03527098894119263, "signal/confidence_uniqueness_reward/group_std_mean": 0.044175655394792554, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004408873617649078, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004408873617649078, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003065389487892389, "signal/frontier_aurc_reward/group_std_mean": 0.004888421203941107, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4870470921741796e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4870470921741796e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22263219356536865, "signal/frontier_coverage_1/group_std_mean": 0.2907342195510864, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_10/centered_abs_mean": 0.22263219356536865, "signal/frontier_coverage_10/group_std_mean": 0.2907342195510864, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_15/centered_abs_mean": 0.22263219356536865, "signal/frontier_coverage_15/group_std_mean": 0.2907342195510864, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_20/centered_abs_mean": 0.22263219356536865, "signal/frontier_coverage_20/group_std_mean": 0.2907342195510864, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_25/centered_abs_mean": 0.22263219356536865, "signal/frontier_coverage_25/group_std_mean": 0.2907342195510864, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_5/centered_abs_mean": 0.22263219356536865, "signal/frontier_coverage_5/group_std_mean": 0.2907342195510864, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003985116025432944, "signal/frontier_ece_reward/centered_abs_mean": 0.04063198119401932, "signal/frontier_ece_reward/group_std_mean": 0.05170249417424202, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005078997649252415, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005078997649252415, "step": 90 }, { "calibration/aurc": 0.29253903941152404, "calibration/batch_distribution_entropy": 0.9048455136474193, "calibration/buffer_distribution_entropy": 0.9462939197873748, "calibration/confidence_entropy": 0.39088382746890016, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1375, "calibration/coverage@15%": 0.1875, "calibration/coverage@20%": 0.257421875, "calibration/coverage@25%": 0.36213460127201563, "calibration/coverage@30%": 0.5430260824363993, "calibration/coverage@5%": 0.0, "calibration/ece": 0.14318374226956557, "calibration/mean_confidence": 0.587406877153499, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.4, "completions/max_terminated_length": 419.4, "completions/mean_length": 160.600390625, "completions/mean_terminated_length": 160.600390625, "completions/min_length": 75.6, "completions/min_terminated_length": 75.6, "epoch": 0.304, "grad_norm": 0.00885615311563015, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 317094947.0, "reward": 1.028617262840271, "reward_std": 0.09624975174665451, "rewards/accuracy_reward": 0.59638671875, "rewards/brier_reward": 0.7746265411376954, "rewards/confidence_uniqueness_reward": 0.9423886299133301, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003213143954053521, "rewards/frontier_coverage_1": 0.12373490929603577, "rewards/frontier_coverage_10": 0.12373490929603577, "rewards/frontier_coverage_15": 0.12373490929603577, "rewards/frontier_coverage_20": 0.12373490929603577, "rewards/frontier_coverage_25": 0.12373490929603577, "rewards/frontier_coverage_5": 0.12373490929603577, "rewards/frontier_ece_reward": 0.021694989316165446, "signal/accuracy_reward/centered_abs_mean": 0.119732666015625, "signal/accuracy_reward/group_std_mean": 0.15952616930007935, "signal/accuracy_reward/group_zero_std_frac": 0.5375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0598663330078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0598663330078125, "signal/advantage_abs_mean": 0.07218151390552521, "signal/advantage_pre_scale_abs_mean": 0.07218151390552521, "signal/advantage_pre_scale_std": 0.11681736111640931, "signal/advantage_std": 0.11681736111640931, "signal/brier_reward/centered_abs_mean": 0.17017588913440704, "signal/brier_reward/group_std_mean": 0.21725533604621888, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02127198614180088, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02127198614180088, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03307611271739006, "signal/confidence_uniqueness_reward/group_std_mean": 0.04140571765601635, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004134514089673758, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004134514089673758, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002763637388125062, "signal/frontier_aurc_reward/group_std_mean": 0.0042175163049250845, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9469107761979106e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9469107761979106e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2136286973953247, "signal/frontier_coverage_1/group_std_mean": 0.2769513875246048, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_10/centered_abs_mean": 0.2136286973953247, "signal/frontier_coverage_10/group_std_mean": 0.2769513875246048, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_15/centered_abs_mean": 0.2136286973953247, "signal/frontier_coverage_15/group_std_mean": 0.2769513875246048, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_20/centered_abs_mean": 0.2136286973953247, "signal/frontier_coverage_20/group_std_mean": 0.2769513875246048, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_25/centered_abs_mean": 0.2136286973953247, "signal/frontier_coverage_25/group_std_mean": 0.2769513875246048, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_5/centered_abs_mean": 0.2136286973953247, "signal/frontier_coverage_5/group_std_mean": 0.2769513875246048, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038239536806941032, "signal/frontier_ece_reward/centered_abs_mean": 0.03785905465483665, "signal/frontier_ece_reward/group_std_mean": 0.04792718142271042, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004732381831854582, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004732381831854582, "step": 95 }, { "calibration/aurc": 0.24936746292749862, "calibration/batch_distribution_entropy": 0.898710830523522, "calibration/buffer_distribution_entropy": 0.9460732940566577, "calibration/confidence_entropy": 0.3851945609629142, "calibration/coverage@0%": 0.015264187866927592, "calibration/coverage@1%": 0.015264187866927592, "calibration/coverage@10%": 0.2279705846379648, "calibration/coverage@15%": 0.3795835371819961, "calibration/coverage@20%": 0.45266481164383554, "calibration/coverage@25%": 0.5382422639432486, "calibration/coverage@30%": 0.6234313845401174, "calibration/coverage@5%": 0.10597251100782779, "calibration/ece": 0.1376192145922726, "calibration/mean_confidence": 0.6140883395295473, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 647.4, "completions/max_terminated_length": 489.6, "completions/mean_length": 164.50380859375, "completions/mean_terminated_length": 164.3696044921875, "completions/min_length": 68.4, "completions/min_terminated_length": 68.4, "epoch": 0.32, "grad_norm": 0.003183668712154031, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 333868170.0, "reward": 1.037460708618164, "reward_std": 0.08647488206624984, "rewards/accuracy_reward": 0.60302734375, "rewards/brier_reward": 0.8063218474388123, "rewards/confidence_uniqueness_reward": 0.944849681854248, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0029652828816324472, "rewards/frontier_coverage_1": 0.1294178381562233, "rewards/frontier_coverage_10": 0.1294178381562233, "rewards/frontier_coverage_15": 0.1294178381562233, "rewards/frontier_coverage_20": 0.1294178381562233, "rewards/frontier_coverage_25": 0.1294178381562233, "rewards/frontier_coverage_5": 0.1294178381562233, "rewards/frontier_ece_reward": 0.02797740586102009, "signal/accuracy_reward/centered_abs_mean": 0.090289306640625, "signal/accuracy_reward/group_std_mean": 0.12296751439571381, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451446533203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0451446533203125, "signal/advantage_abs_mean": 0.06498634666204453, "signal/advantage_pre_scale_abs_mean": 0.06498634666204453, "signal/advantage_pre_scale_std": 0.11145332753658295, "signal/advantage_std": 0.11145332753658295, "signal/brier_reward/centered_abs_mean": 0.1528707653284073, "signal/brier_reward/group_std_mean": 0.19931194186210632, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01910884566605091, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01910884566605091, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030894938856363297, "signal/confidence_uniqueness_reward/group_std_mean": 0.03997356966137886, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003861867357045412, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003861867357045412, "signal/format_reward/centered_abs_mean": 0.001123046875, "signal/format_reward/group_std_mean": 0.0029782545287162067, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005615234375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002860796870663762, "signal/frontier_aurc_reward/group_std_mean": 0.0042446996085345745, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.120826244819909e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.120826244819909e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17833363115787507, "signal/frontier_coverage_1/group_std_mean": 0.23469134867191316, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_10/centered_abs_mean": 0.17833363115787507, "signal/frontier_coverage_10/group_std_mean": 0.23469134867191316, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_15/centered_abs_mean": 0.17833363115787507, "signal/frontier_coverage_15/group_std_mean": 0.23469134867191316, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_20/centered_abs_mean": 0.17833363115787507, "signal/frontier_coverage_20/group_std_mean": 0.23469134867191316, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_25/centered_abs_mean": 0.17833363115787507, "signal/frontier_coverage_25/group_std_mean": 0.23469134867191316, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_5/centered_abs_mean": 0.17833363115787507, "signal/frontier_coverage_5/group_std_mean": 0.23469134867191316, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031921718269586562, "signal/frontier_ece_reward/centered_abs_mean": 0.037183419615030286, "signal/frontier_ece_reward/group_std_mean": 0.04669267162680626, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004647927451878786, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004647927451878786, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.5361608562879745, "eval_calibration/batch_distribution_entropy": 0.8548767773066339, "eval_calibration/buffer_distribution_entropy": 0.9456258884469577, "eval_calibration/confidence_entropy": 0.3856233495527517, "eval_calibration/coverage@0%": 0.0234375, "eval_calibration/coverage@1%": 0.0234375, "eval_calibration/coverage@10%": 0.0234375, "eval_calibration/coverage@15%": 0.0859375, "eval_calibration/coverage@20%": 0.1015625, "eval_calibration/coverage@25%": 0.1171875, "eval_calibration/coverage@30%": 0.125, "eval_calibration/coverage@5%": 0.0234375, "eval_calibration/ece": 0.2574609375, "eval_calibration/mean_confidence": 0.5250390625, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 313.0, "eval_completions/max_terminated_length": 313.0, "eval_completions/mean_length": 166.9853172302246, "eval_completions/mean_terminated_length": 166.9853172302246, "eval_completions/min_length": 93.75, "eval_completions/min_terminated_length": 93.75, "eval_loss": 0.0, "eval_num_tokens": 333868170.0, "eval_reward": 0.9285456091165543, "eval_reward_std": 0.2409592606127262, "eval_rewards/accuracy_reward": 0.3984375, "eval_rewards/brier_reward": 0.7509243190288544, "eval_rewards/confidence_uniqueness_reward": 0.890869140625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004297213105019182, "eval_rewards/frontier_coverage_1": 0.20684602111577988, "eval_rewards/frontier_coverage_10": 0.20684602111577988, "eval_rewards/frontier_coverage_15": 0.20684602111577988, "eval_rewards/frontier_coverage_20": 0.20684602111577988, "eval_rewards/frontier_coverage_25": 0.20684602111577988, "eval_rewards/frontier_coverage_5": 0.20684602111577988, "eval_rewards/frontier_ece_reward": 0.015714747074525803, "eval_runtime": 17.5982, "eval_samples_per_second": 28.412, "eval_signal/accuracy_reward/centered_abs_mean": 0.466064453125, "eval_signal/accuracy_reward/group_std_mean": 0.489865705370903, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2330322265625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2330322265625, "eval_signal/advantage_abs_mean": 0.21615619584918022, "eval_signal/advantage_pre_scale_abs_mean": 0.21615619584918022, "eval_signal/advantage_pre_scale_std": 0.23838016018271446, "eval_signal/advantage_std": 0.23838016018271446, "eval_signal/brier_reward/centered_abs_mean": 0.26976919919252396, "eval_signal/brier_reward/group_std_mean": 0.3225868046283722, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033721149899065495, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.033721149899065495, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0496673583984375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06072596646845341, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0062084197998046875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0062084197998046875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005205620895139873, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008366801775991917, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.31806080188835e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.31806080188835e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.36348968744277954, "eval_signal/frontier_coverage_1/group_std_mean": 0.453485868871212, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.36348968744277954, "eval_signal/frontier_coverage_10/group_std_mean": 0.453485868871212, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.36348968744277954, "eval_signal/frontier_coverage_15/group_std_mean": 0.453485868871212, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.36348968744277954, "eval_signal/frontier_coverage_20/group_std_mean": 0.453485868871212, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.36348968744277954, "eval_signal/frontier_coverage_25/group_std_mean": 0.453485868871212, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.36348968744277954, "eval_signal/frontier_coverage_5/group_std_mean": 0.453485868871212, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00650646525900811, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05878347251564264, "eval_signal/frontier_ece_reward/group_std_mean": 0.07752788066864014, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00734793406445533, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00734793406445533, "eval_steps_per_second": 0.227, "step": 100 }, { "calibration/aurc": 0.3170759678648111, "calibration/batch_distribution_entropy": 0.9268662687477087, "calibration/buffer_distribution_entropy": 0.9480795484053474, "calibration/confidence_entropy": 0.4027125187855171, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.008203125, "calibration/coverage@15%": 0.06001657662082514, "calibration/coverage@20%": 0.1137394093811395, "calibration/coverage@25%": 0.34513675712180747, "calibration/coverage@30%": 0.5636135498526522, "calibration/coverage@5%": 0.0, "calibration/ece": 0.13889236064162716, "calibration/mean_confidence": 0.5570489445717209, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 922.4, "completions/max_terminated_length": 491.0, "completions/mean_length": 168.534375, "completions/mean_terminated_length": 168.26749267578126, "completions/min_length": 63.2, "completions/min_terminated_length": 63.2, "epoch": 0.336, "grad_norm": 0.011471702717244625, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 350316394.0, "reward": 1.0325323581695556, "reward_std": 0.09860386103391647, "rewards/accuracy_reward": 0.5978515625, "rewards/brier_reward": 0.7879295349121094, "rewards/confidence_uniqueness_reward": 0.9501477599143981, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0029000297654420137, "rewards/frontier_coverage_1": 0.12584335058927537, "rewards/frontier_coverage_10": 0.12584335058927537, "rewards/frontier_coverage_15": 0.12584335058927537, "rewards/frontier_coverage_20": 0.12584335058927537, "rewards/frontier_coverage_25": 0.12584335058927537, "rewards/frontier_coverage_5": 0.12584335058927537, "rewards/frontier_ece_reward": 0.02501910924911499, "signal/accuracy_reward/centered_abs_mean": 0.11759033203125, "signal/accuracy_reward/group_std_mean": 0.1579001486301422, "signal/accuracy_reward/group_zero_std_frac": 0.540625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058795166015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.058795166015625, "signal/advantage_abs_mean": 0.07503360360860825, "signal/advantage_pre_scale_abs_mean": 0.07503360360860825, "signal/advantage_pre_scale_std": 0.12452945411205292, "signal/advantage_std": 0.12452945411205292, "signal/brier_reward/centered_abs_mean": 0.16325247883796692, "signal/brier_reward/group_std_mean": 0.20753192603588105, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020406559854745866, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020406559854745866, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02659556120634079, "signal/confidence_uniqueness_reward/group_std_mean": 0.03490939736366272, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033244451507925986, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033244451507925986, "signal/format_reward/centered_abs_mean": 0.000909423828125, "signal/format_reward/group_std_mean": 0.002030306123197079, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004547119140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004547119140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027562730945646765, "signal/frontier_aurc_reward/group_std_mean": 0.004085430596023798, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.933728851028718e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.933728851028718e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18382967710494996, "signal/frontier_coverage_1/group_std_mean": 0.24163539111614227, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_10/centered_abs_mean": 0.18382967710494996, "signal/frontier_coverage_10/group_std_mean": 0.24163539111614227, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_15/centered_abs_mean": 0.18382967710494996, "signal/frontier_coverage_15/group_std_mean": 0.24163539111614227, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_20/centered_abs_mean": 0.18382967710494996, "signal/frontier_coverage_20/group_std_mean": 0.24163539111614227, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_25/centered_abs_mean": 0.18382967710494996, "signal/frontier_coverage_25/group_std_mean": 0.24163539111614227, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_5/centered_abs_mean": 0.18382967710494996, "signal/frontier_coverage_5/group_std_mean": 0.24163539111614227, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032905511558055877, "signal/frontier_ece_reward/centered_abs_mean": 0.034998999536037446, "signal/frontier_ece_reward/group_std_mean": 0.04407136589288711, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004374874942004681, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004374874942004681, "step": 105 }, { "calibration/aurc": 0.3232366665037657, "calibration/batch_distribution_entropy": 0.8812064271537494, "calibration/buffer_distribution_entropy": 0.9553933684063487, "calibration/confidence_entropy": 0.35902633675891904, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.16585815190802347, "calibration/coverage@15%": 0.24369801859099804, "calibration/coverage@20%": 0.3855461105675147, "calibration/coverage@25%": 0.44301614481409, "calibration/coverage@30%": 0.5180536020058708, "calibration/coverage@5%": 0.018003913894324854, "calibration/ece": 0.13547729630579644, "calibration/mean_confidence": 0.49073865731901967, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 630.8, "completions/max_terminated_length": 398.8, "completions/mean_length": 170.27373046875, "completions/mean_terminated_length": 170.14059448242188, "completions/min_length": 78.4, "completions/min_terminated_length": 78.4, "epoch": 0.352, "grad_norm": 0.012238552793860435, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 367320413.0, "reward": 1.001007616519928, "reward_std": 0.09876850098371506, "rewards/accuracy_reward": 0.530078125, "rewards/brier_reward": 0.7849451780319214, "rewards/confidence_uniqueness_reward": 0.943973433971405, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0035563561599701644, "rewards/frontier_coverage_1": 0.16559779942035674, "rewards/frontier_coverage_10": 0.16559779942035674, "rewards/frontier_coverage_15": 0.16559779942035674, "rewards/frontier_coverage_20": 0.16559779942035674, "rewards/frontier_coverage_25": 0.16559779942035674, "rewards/frontier_coverage_5": 0.16559779942035674, "rewards/frontier_ece_reward": 0.018229612335562705, "signal/accuracy_reward/centered_abs_mean": 0.1173095703125, "signal/accuracy_reward/group_std_mean": 0.15779185593128203, "signal/accuracy_reward/group_zero_std_frac": 0.5375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05865478515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05865478515625, "signal/advantage_abs_mean": 0.07470797747373581, "signal/advantage_pre_scale_abs_mean": 0.07470797747373581, "signal/advantage_pre_scale_std": 0.1232941284775734, "signal/advantage_std": 0.1232941284775734, "signal/brier_reward/centered_abs_mean": 0.16526894867420197, "signal/brier_reward/group_std_mean": 0.21336513757705688, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020658618584275246, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020658618584275246, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031228836625814438, "signal/confidence_uniqueness_reward/group_std_mean": 0.04048001915216446, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0039036045782268047, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0039036045782268047, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033744067884981634, "signal/frontier_aurc_reward/group_std_mean": 0.0051686098799109455, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.040188018232584e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.040188018232584e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1909989595413208, "signal/frontier_coverage_1/group_std_mean": 0.2494662880897522, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_10/centered_abs_mean": 0.1909989595413208, "signal/frontier_coverage_10/group_std_mean": 0.2494662880897522, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_15/centered_abs_mean": 0.1909989595413208, "signal/frontier_coverage_15/group_std_mean": 0.2494662880897522, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_20/centered_abs_mean": 0.1909989595413208, "signal/frontier_coverage_20/group_std_mean": 0.2494662880897522, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_25/centered_abs_mean": 0.1909989595413208, "signal/frontier_coverage_25/group_std_mean": 0.2494662880897522, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_5/centered_abs_mean": 0.1909989595413208, "signal/frontier_coverage_5/group_std_mean": 0.2494662880897522, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034188813529908656, "signal/frontier_ece_reward/centered_abs_mean": 0.030147189274430275, "signal/frontier_ece_reward/group_std_mean": 0.03832725360989571, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037683986593037844, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037683986593037844, "step": 110 }, { "calibration/aurc": 0.3797556873444166, "calibration/batch_distribution_entropy": 0.8650275868499007, "calibration/buffer_distribution_entropy": 0.9613778439528119, "calibration/confidence_entropy": 0.3575837773714898, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.061328125, "calibration/coverage@15%": 0.1, "calibration/coverage@20%": 0.26171875, "calibration/coverage@25%": 0.326953125, "calibration/coverage@30%": 0.375, "calibration/coverage@5%": 0.0078125, "calibration/ece": 0.1910163477538498, "calibration/mean_confidence": 0.5746932088470622, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 897.8, "completions/max_terminated_length": 462.4, "completions/mean_length": 172.3744140625, "completions/mean_terminated_length": 172.1084747314453, "completions/min_length": 67.8, "completions/min_terminated_length": 67.8, "epoch": 0.368, "grad_norm": 0.022365767508745193, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 384150999.0, "reward": 1.0263174772262573, "reward_std": 0.09078062623739243, "rewards/accuracy_reward": 0.578125, "rewards/brier_reward": 0.8040992617607117, "rewards/confidence_uniqueness_reward": 0.9418166399002075, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0037387159187346696, "rewards/frontier_coverage_1": 0.1589544117450714, "rewards/frontier_coverage_10": 0.1589544117450714, "rewards/frontier_coverage_15": 0.1589544117450714, "rewards/frontier_coverage_20": 0.1589544117450714, "rewards/frontier_coverage_25": 0.1589544117450714, "rewards/frontier_coverage_5": 0.1589544117450714, "rewards/frontier_ece_reward": 0.01803905926644802, "signal/accuracy_reward/centered_abs_mean": 0.0984130859375, "signal/accuracy_reward/group_std_mean": 0.1334820196032524, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04920654296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04920654296875, "signal/advantage_abs_mean": 0.06783413365483285, "signal/advantage_pre_scale_abs_mean": 0.06783413365483285, "signal/advantage_pre_scale_std": 0.11692542880773545, "signal/advantage_std": 0.11692542880773545, "signal/brier_reward/centered_abs_mean": 0.15209992229938507, "signal/brier_reward/group_std_mean": 0.19859039783477783, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019012490287423134, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019012490287423134, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.032923289388418195, "signal/confidence_uniqueness_reward/group_std_mean": 0.04358488842844963, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004115411173552274, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004115411173552274, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003569593699648976, "signal/frontier_aurc_reward/group_std_mean": 0.005398597475141287, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.389572881744243e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.389572881744243e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17154284417629242, "signal/frontier_coverage_1/group_std_mean": 0.22746075689792633, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_10/centered_abs_mean": 0.17154284417629242, "signal/frontier_coverage_10/group_std_mean": 0.22746075689792633, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_15/centered_abs_mean": 0.17154284417629242, "signal/frontier_coverage_15/group_std_mean": 0.22746075689792633, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_20/centered_abs_mean": 0.17154284417629242, "signal/frontier_coverage_20/group_std_mean": 0.22746075689792633, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_25/centered_abs_mean": 0.17154284417629242, "signal/frontier_coverage_25/group_std_mean": 0.22746075689792633, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_5/centered_abs_mean": 0.17154284417629242, "signal/frontier_coverage_5/group_std_mean": 0.22746075689792633, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030706167686730622, "signal/frontier_ece_reward/centered_abs_mean": 0.023829102888703345, "signal/frontier_ece_reward/group_std_mean": 0.030207440629601477, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002978637861087918, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002978637861087918, "step": 115 }, { "calibration/aurc": 0.34978752891207854, "calibration/batch_distribution_entropy": 0.8743274420308669, "calibration/buffer_distribution_entropy": 0.9651506073276552, "calibration/confidence_entropy": 0.3524804429751578, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.15099070450097846, "calibration/coverage@15%": 0.22602433953033269, "calibration/coverage@20%": 0.2737088735322896, "calibration/coverage@25%": 0.32334500366927593, "calibration/coverage@30%": 0.3589125183463796, "calibration/coverage@5%": 0.03287671232876712, "calibration/ece": 0.15381430206719035, "calibration/mean_confidence": 0.4920425182661553, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 913.8, "completions/max_terminated_length": 489.8, "completions/mean_length": 172.56142578125, "completions/mean_terminated_length": 172.16313781738282, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.384, "grad_norm": 0.01273356843739748, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 400774540.0, "reward": 1.024810528755188, "reward_std": 0.08978459835052491, "rewards/accuracy_reward": 0.57666015625, "rewards/brier_reward": 0.8028544902801513, "rewards/confidence_uniqueness_reward": 0.9432775259017945, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0032439586240798233, "rewards/frontier_coverage_1": 0.15212270766496658, "rewards/frontier_coverage_10": 0.15212270766496658, "rewards/frontier_coverage_15": 0.15212270766496658, "rewards/frontier_coverage_20": 0.15212270766496658, "rewards/frontier_coverage_25": 0.15212270766496658, "rewards/frontier_coverage_5": 0.15212270766496658, "rewards/frontier_ece_reward": 0.017425185441970824, "signal/accuracy_reward/centered_abs_mean": 0.106951904296875, "signal/accuracy_reward/group_std_mean": 0.1465958684682846, "signal/accuracy_reward/group_zero_std_frac": 0.5625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0534759521484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0534759521484375, "signal/advantage_abs_mean": 0.06634962484240532, "signal/advantage_pre_scale_abs_mean": 0.06634962484240532, "signal/advantage_pre_scale_std": 0.11549568325281143, "signal/advantage_std": 0.11549568325281143, "signal/brier_reward/centered_abs_mean": 0.14517641365528106, "signal/brier_reward/group_std_mean": 0.18895911276340485, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018147051706910132, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018147051706910132, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03095161318778992, "signal/confidence_uniqueness_reward/group_std_mean": 0.04060640558600426, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00386895164847374, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00386895164847374, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003309731697663665, "signal/frontier_aurc_reward/group_std_mean": 0.00533204497769475, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9244197473162785e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9244197473162785e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17565982341766356, "signal/frontier_coverage_1/group_std_mean": 0.23116243183612822, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_10/centered_abs_mean": 0.17565982341766356, "signal/frontier_coverage_10/group_std_mean": 0.23116243183612822, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_15/centered_abs_mean": 0.17565982341766356, "signal/frontier_coverage_15/group_std_mean": 0.23116243183612822, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_20/centered_abs_mean": 0.17565982341766356, "signal/frontier_coverage_20/group_std_mean": 0.23116243183612822, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_25/centered_abs_mean": 0.17565982341766356, "signal/frontier_coverage_25/group_std_mean": 0.23116243183612822, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_5/centered_abs_mean": 0.17565982341766356, "signal/frontier_coverage_5/group_std_mean": 0.23116243183612822, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031443107407540085, "signal/frontier_ece_reward/centered_abs_mean": 0.01915326751768589, "signal/frontier_ece_reward/group_std_mean": 0.024320138990879057, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002394158439710736, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002394158439710736, "step": 120 }, { "calibration/aurc": 0.4435299753075933, "calibration/batch_distribution_entropy": 0.9099935259372789, "calibration/buffer_distribution_entropy": 0.9669285677408318, "calibration/confidence_entropy": 0.3907481024090136, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.018003913894324854, "calibration/coverage@25%": 0.04261328889432485, "calibration/coverage@30%": 0.11968107876712328, "calibration/coverage@5%": 0.0, "calibration/ece": 0.22419826224495804, "calibration/mean_confidence": 0.545322742176537, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 634.4, "completions/max_terminated_length": 409.4, "completions/mean_length": 170.94443359375, "completions/mean_terminated_length": 170.81153564453126, "completions/min_length": 85.8, "completions/min_terminated_length": 85.8, "epoch": 0.4, "grad_norm": 0.0018827987369149923, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 417561459.0, "reward": 1.020148515701294, "reward_std": 0.09094813764095307, "rewards/accuracy_reward": 0.572265625, "rewards/brier_reward": 0.7909515976905823, "rewards/confidence_uniqueness_reward": 0.9425244092941284, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.004015334136784077, "rewards/frontier_coverage_1": 0.14759057611227036, "rewards/frontier_coverage_10": 0.14759057611227036, "rewards/frontier_coverage_15": 0.14759057611227036, "rewards/frontier_coverage_20": 0.14759057611227036, "rewards/frontier_coverage_25": 0.14238842576742172, "rewards/frontier_coverage_5": 0.14759057611227036, "rewards/frontier_ece_reward": 0.01355019873008132, "signal/accuracy_reward/centered_abs_mean": 0.1117431640625, "signal/accuracy_reward/group_std_mean": 0.15235530138015746, "signal/accuracy_reward/group_zero_std_frac": 0.54375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05587158203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05587158203125, "signal/advantage_abs_mean": 0.06790433377027512, "signal/advantage_pre_scale_abs_mean": 0.06790433377027512, "signal/advantage_pre_scale_std": 0.1179421067237854, "signal/advantage_std": 0.1179421067237854, "signal/brier_reward/centered_abs_mean": 0.15299761891365052, "signal/brier_reward/group_std_mean": 0.19623776376247407, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019124702364206315, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019124702364206315, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03119489848613739, "signal/confidence_uniqueness_reward/group_std_mean": 0.03921703845262527, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038993623107671737, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038993623107671737, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.004192573670297861, "signal/frontier_aurc_reward/group_std_mean": 0.006507566943764686, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.504706663894468e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.504706663894468e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17459246814250945, "signal/frontier_coverage_1/group_std_mean": 0.22909881174564362, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_10/centered_abs_mean": 0.17459246814250945, "signal/frontier_coverage_10/group_std_mean": 0.22909881174564362, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_15/centered_abs_mean": 0.17459246814250945, "signal/frontier_coverage_15/group_std_mean": 0.22909881174564362, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_20/centered_abs_mean": 0.17459246814250945, "signal/frontier_coverage_20/group_std_mean": 0.22909881174564362, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_25/centered_abs_mean": 0.1689872920513153, "signal/frontier_coverage_25/group_std_mean": 0.22179524898529052, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030248723924160004, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030248723924160004, "signal/frontier_coverage_5/centered_abs_mean": 0.17459246814250945, "signal/frontier_coverage_5/group_std_mean": 0.22909881174564362, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031252051237970592, "signal/frontier_ece_reward/centered_abs_mean": 0.018266384676098823, "signal/frontier_ece_reward/group_std_mean": 0.0225957952439785, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002283298084512353, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002283298084512353, "step": 125 }, { "calibration/aurc": 0.32036860924247845, "calibration/batch_distribution_entropy": 0.9040425655296254, "calibration/buffer_distribution_entropy": 0.9658882837152956, "calibration/confidence_entropy": 0.3965625761816952, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.026171875, "calibration/coverage@15%": 0.05859375, "calibration/coverage@20%": 0.123828125, "calibration/coverage@25%": 0.226171875, "calibration/coverage@30%": 0.46484375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16183175553137435, "calibration/mean_confidence": 0.5771191877791881, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 634.0, "completions/max_terminated_length": 412.8, "completions/mean_length": 172.9611328125, "completions/mean_terminated_length": 172.82837829589843, "completions/min_length": 85.4, "completions/min_terminated_length": 85.4, "epoch": 0.416, "grad_norm": 0.0024297665804624557, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 434213765.0, "reward": 1.0161998510360717, "reward_std": 0.09269836395978928, "rewards/accuracy_reward": 0.56416015625, "rewards/brier_reward": 0.7885978817939758, "rewards/confidence_uniqueness_reward": 0.9452348351478577, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0034807201474905012, "rewards/frontier_coverage_1": 0.1499410331249237, "rewards/frontier_coverage_10": 0.1499410331249237, "rewards/frontier_coverage_15": 0.1499410331249237, "rewards/frontier_coverage_20": 0.1499410331249237, "rewards/frontier_coverage_25": 0.1424618661403656, "rewards/frontier_coverage_5": 0.1499410331249237, "rewards/frontier_ece_reward": 0.012646915204823018, "signal/accuracy_reward/centered_abs_mean": 0.114910888671875, "signal/accuracy_reward/group_std_mean": 0.15072711706161498, "signal/accuracy_reward/group_zero_std_frac": 0.571875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574554443359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0574554443359375, "signal/advantage_abs_mean": 0.07128219231963158, "signal/advantage_pre_scale_abs_mean": 0.07128219231963158, "signal/advantage_pre_scale_std": 0.11946070045232773, "signal/advantage_std": 0.11946070045232773, "signal/brier_reward/centered_abs_mean": 0.16007616817951204, "signal/brier_reward/group_std_mean": 0.20323581397533416, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020009521022439004, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020009521022439004, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02985215187072754, "signal/confidence_uniqueness_reward/group_std_mean": 0.03883992582559585, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037315189838409424, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037315189838409424, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0035416937433183195, "signal/frontier_aurc_reward/group_std_mean": 0.005710090417414904, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.339631509035825e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.339631509035825e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18811068534851075, "signal/frontier_coverage_1/group_std_mean": 0.24402026534080506, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_10/centered_abs_mean": 0.18811068534851075, "signal/frontier_coverage_10/group_std_mean": 0.24402026534080506, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_15/centered_abs_mean": 0.18811068534851075, "signal/frontier_coverage_15/group_std_mean": 0.24402026534080506, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_20/centered_abs_mean": 0.18811068534851075, "signal/frontier_coverage_20/group_std_mean": 0.24402026534080506, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_25/centered_abs_mean": 0.175497430562973, "signal/frontier_coverage_25/group_std_mean": 0.22769122421741486, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003141403943300247, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003141403943300247, "signal/frontier_coverage_5/centered_abs_mean": 0.18811068534851075, "signal/frontier_coverage_5/group_std_mean": 0.24402026534080506, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003367181122303009, "signal/frontier_ece_reward/centered_abs_mean": 0.016132255643606187, "signal/frontier_ece_reward/group_std_mean": 0.02017546221613884, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020165319554507734, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020165319554507734, "step": 130 }, { "calibration/aurc": 0.27492773853385566, "calibration/batch_distribution_entropy": 0.8936948367188059, "calibration/buffer_distribution_entropy": 0.9619736028048184, "calibration/confidence_entropy": 0.374110594039602, "calibration/coverage@0%": 0.014453125, "calibration/coverage@1%": 0.014453125, "calibration/coverage@10%": 0.11875, "calibration/coverage@15%": 0.22734375, "calibration/coverage@20%": 0.333203125, "calibration/coverage@25%": 0.4421875, "calibration/coverage@30%": 0.512109375, "calibration/coverage@5%": 0.058984375, "calibration/ece": 0.13765157935061254, "calibration/mean_confidence": 0.5749263041564635, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 627.4, "completions/max_terminated_length": 393.2, "completions/mean_length": 172.41337890625, "completions/mean_terminated_length": 172.28014221191407, "completions/min_length": 88.2, "completions/min_terminated_length": 88.2, "epoch": 0.432, "grad_norm": 0.014152178540825844, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 450993614.0, "reward": 1.034668791294098, "reward_std": 0.08032770156860351, "rewards/accuracy_reward": 0.595703125, "rewards/brier_reward": 0.81131010055542, "rewards/confidence_uniqueness_reward": 0.9441461324691772, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0028154389234259724, "rewards/frontier_coverage_1": 0.14911295846104622, "rewards/frontier_coverage_10": 0.14911295846104622, "rewards/frontier_coverage_15": 0.14911295846104622, "rewards/frontier_coverage_20": 0.14911295846104622, "rewards/frontier_coverage_25": 0.1374576583504677, "rewards/frontier_coverage_5": 0.14911295846104622, "rewards/frontier_ece_reward": 0.013817432709038258, "signal/accuracy_reward/centered_abs_mean": 0.1028564453125, "signal/accuracy_reward/group_std_mean": 0.1392101302742958, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05142822265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05142822265625, "signal/advantage_abs_mean": 0.06035290732979774, "signal/advantage_pre_scale_abs_mean": 0.06035290732979774, "signal/advantage_pre_scale_std": 0.10709730833768845, "signal/advantage_std": 0.10709730833768845, "signal/brier_reward/centered_abs_mean": 0.1353680819272995, "signal/brier_reward/group_std_mean": 0.17475684881210327, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016921010240912436, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016921010240912436, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030345895141363145, "signal/confidence_uniqueness_reward/group_std_mean": 0.03937292844057083, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003793236892670393, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003793236892670393, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028929989319294693, "signal/frontier_aurc_reward/group_std_mean": 0.00475033214315772, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.178467981750146e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.178467981750146e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17074471116065978, "signal/frontier_coverage_1/group_std_mean": 0.2249886155128479, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_10/centered_abs_mean": 0.17074471116065978, "signal/frontier_coverage_10/group_std_mean": 0.2249886155128479, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_15/centered_abs_mean": 0.17074471116065978, "signal/frontier_coverage_15/group_std_mean": 0.2249886155128479, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_20/centered_abs_mean": 0.17074471116065978, "signal/frontier_coverage_20/group_std_mean": 0.2249886155128479, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_25/centered_abs_mean": 0.15415639579296112, "signal/frontier_coverage_25/group_std_mean": 0.20353280007839203, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002759399451315403, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002759399451315403, "signal/frontier_coverage_5/centered_abs_mean": 0.17074471116065978, "signal/frontier_coverage_5/group_std_mean": 0.2249886155128479, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030563301406800747, "signal/frontier_ece_reward/centered_abs_mean": 0.013533397577702999, "signal/frontier_ece_reward/group_std_mean": 0.017023883387446405, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016916746972128749, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016916746972128749, "step": 135 }, { "calibration/aurc": 0.28909308310100607, "calibration/batch_distribution_entropy": 0.9344033893289716, "calibration/buffer_distribution_entropy": 0.9556897831631528, "calibration/confidence_entropy": 0.4277412313639977, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.05859375, "calibration/coverage@15%": 0.141796875, "calibration/coverage@20%": 0.204296875, "calibration/coverage@25%": 0.253125, "calibration/coverage@30%": 0.57734375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1422481416663646, "calibration/mean_confidence": 0.5711865293477547, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 609.8, "completions/max_terminated_length": 383.2, "completions/mean_length": 179.7435546875, "completions/mean_terminated_length": 179.6114044189453, "completions/min_length": 70.6, "completions/min_terminated_length": 70.6, "epoch": 0.448, "grad_norm": 0.009844657965004444, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 467786988.0, "reward": 1.0257550001144409, "reward_std": 0.08076644837856292, "rewards/accuracy_reward": 0.57421875, "rewards/brier_reward": 0.8085735321044922, "rewards/confidence_uniqueness_reward": 0.9484065532684326, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0029993959702551364, "rewards/frontier_coverage_1": 0.16901236772537231, "rewards/frontier_coverage_10": 0.16901236772537231, "rewards/frontier_coverage_15": 0.16901236772537231, "rewards/frontier_coverage_20": 0.16901236772537231, "rewards/frontier_coverage_25": 0.16056638807058335, "rewards/frontier_coverage_5": 0.16901236772537231, "rewards/frontier_ece_reward": 0.011342884600162506, "signal/accuracy_reward/centered_abs_mean": 0.09781494140625, "signal/accuracy_reward/group_std_mean": 0.13312481343746185, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048907470703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.048907470703125, "signal/advantage_abs_mean": 0.0605968214571476, "signal/advantage_pre_scale_abs_mean": 0.0605968214571476, "signal/advantage_pre_scale_std": 0.10682090073823929, "signal/advantage_std": 0.10682090073823929, "signal/brier_reward/centered_abs_mean": 0.1433545708656311, "signal/brier_reward/group_std_mean": 0.1853357344865799, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01791932135820389, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01791932135820389, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0269392192363739, "signal/confidence_uniqueness_reward/group_std_mean": 0.03449588306248188, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033674024045467375, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033674024045467375, "signal/format_reward/centered_abs_mean": 0.001068115234375, "signal/format_reward/group_std_mean": 0.0013125419616699218, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005340576171875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005340576171875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002783289086073637, "signal/frontier_aurc_reward/group_std_mean": 0.004721877304837108, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.982087411917746e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.982087411917746e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18696335852146148, "signal/frontier_coverage_1/group_std_mean": 0.24190506637096404, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_10/centered_abs_mean": 0.18696335852146148, "signal/frontier_coverage_10/group_std_mean": 0.24190506637096404, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_15/centered_abs_mean": 0.18696335852146148, "signal/frontier_coverage_15/group_std_mean": 0.24190506637096404, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_20/centered_abs_mean": 0.18696335852146148, "signal/frontier_coverage_20/group_std_mean": 0.24190506637096404, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_25/centered_abs_mean": 0.1663324326276779, "signal/frontier_coverage_25/group_std_mean": 0.21630672812461854, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002977350587025285, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002977350587025285, "signal/frontier_coverage_5/centered_abs_mean": 0.18696335852146148, "signal/frontier_coverage_5/group_std_mean": 0.24190506637096404, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003346643876284361, "signal/frontier_ece_reward/centered_abs_mean": 0.01322302669286728, "signal/frontier_ece_reward/group_std_mean": 0.01661177948117256, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00165287833660841, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00165287833660841, "step": 140 }, { "calibration/aurc": 0.4461233259007119, "calibration/batch_distribution_entropy": 0.9497364830495236, "calibration/buffer_distribution_entropy": 0.9507200407112958, "calibration/confidence_entropy": 0.4329203666177947, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.009375, "calibration/coverage@20%": 0.03671875, "calibration/coverage@25%": 0.058203125, "calibration/coverage@30%": 0.220459271037182, "calibration/coverage@5%": 0.0, "calibration/ece": 0.154924640530329, "calibration/mean_confidence": 0.491454960609382, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.2, "completions/max_terminated_length": 425.2, "completions/mean_length": 185.42412109375, "completions/mean_terminated_length": 185.42412109375, "completions/min_length": 89.6, "completions/min_terminated_length": 89.6, "epoch": 0.464, "grad_norm": 0.001580472569912672, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 484856547.0, "reward": 1.0002707242965698, "reward_std": 0.07840840741991997, "rewards/accuracy_reward": 0.52705078125, "rewards/brier_reward": 0.7894474506378174, "rewards/confidence_uniqueness_reward": 0.946292269229889, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00347891659475863, "rewards/frontier_coverage_1": 0.1787361979484558, "rewards/frontier_coverage_10": 0.1787361979484558, "rewards/frontier_coverage_15": 0.1787361979484558, "rewards/frontier_coverage_20": 0.1787361979484558, "rewards/frontier_coverage_25": 0.1631140410900116, "rewards/frontier_coverage_5": 0.1787361979484558, "rewards/frontier_ece_reward": 0.007778843771666289, "signal/accuracy_reward/centered_abs_mean": 0.095733642578125, "signal/accuracy_reward/group_std_mean": 0.12840082347393036, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0478668212890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0478668212890625, "signal/advantage_abs_mean": 0.059501688182353976, "signal/advantage_pre_scale_abs_mean": 0.059501688182353976, "signal/advantage_pre_scale_std": 0.10387323051691055, "signal/advantage_std": 0.10387323051691055, "signal/brier_reward/centered_abs_mean": 0.13797992914915086, "signal/brier_reward/group_std_mean": 0.17975262105464934, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017247491143643857, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017247491143643857, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02722937911748886, "signal/confidence_uniqueness_reward/group_std_mean": 0.03534681871533394, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034036723896861075, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034036723896861075, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025480398908257484, "signal/frontier_aurc_reward/group_std_mean": 0.004373569739982486, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.560991146718152e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.560991146718152e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17491473257541656, "signal/frontier_coverage_1/group_std_mean": 0.23137665688991546, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_10/centered_abs_mean": 0.17491473257541656, "signal/frontier_coverage_10/group_std_mean": 0.23137665688991546, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_15/centered_abs_mean": 0.17491473257541656, "signal/frontier_coverage_15/group_std_mean": 0.23137665688991546, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_20/centered_abs_mean": 0.17491473257541656, "signal/frontier_coverage_20/group_std_mean": 0.23137665688991546, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_25/centered_abs_mean": 0.15687368512153627, "signal/frontier_coverage_25/group_std_mean": 0.20789795517921447, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028080389834940433, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028080389834940433, "signal/frontier_coverage_5/centered_abs_mean": 0.17491473257541656, "signal/frontier_coverage_5/group_std_mean": 0.23137665688991546, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00313097364269197, "signal/frontier_ece_reward/centered_abs_mean": 0.012085023522377013, "signal/frontier_ece_reward/group_std_mean": 0.015317281149327755, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015106279402971267, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015106279402971267, "step": 145 }, { "calibration/aurc": 0.3023387085329794, "calibration/batch_distribution_entropy": 0.9264211093467802, "calibration/buffer_distribution_entropy": 0.9453633408800123, "calibration/confidence_entropy": 0.413005030259173, "calibration/coverage@0%": 0.015258836839530332, "calibration/coverage@1%": 0.015258836839530332, "calibration/coverage@10%": 0.06418251590019569, "calibration/coverage@15%": 0.08179504036203522, "calibration/coverage@20%": 0.28142581947162426, "calibration/coverage@25%": 0.3337955601761252, "calibration/coverage@30%": 0.49436766144814087, "calibration/coverage@5%": 0.031697193003913895, "calibration/ece": 0.15141720566899758, "calibration/mean_confidence": 0.5136556092663461, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 732.4, "completions/max_terminated_length": 501.2, "completions/mean_length": 187.25947265625, "completions/mean_terminated_length": 186.99586791992186, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.48, "grad_norm": 0.0019054191652685404, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 501822116.0, "reward": 1.0201605319976808, "reward_std": 0.08553178757429122, "rewards/accuracy_reward": 0.5736328125, "rewards/brier_reward": 0.7931627750396728, "rewards/confidence_uniqueness_reward": 0.9459454536437988, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0027874172665178776, "rewards/frontier_coverage_1": 0.14067478179931642, "rewards/frontier_coverage_10": 0.14067478179931642, "rewards/frontier_coverage_15": 0.14067478179931642, "rewards/frontier_coverage_20": 0.14067478179931642, "rewards/frontier_coverage_25": 0.13163903802633287, "rewards/frontier_coverage_5": 0.14067478179931642, "rewards/frontier_ece_reward": 0.009641882218420505, "signal/accuracy_reward/centered_abs_mean": 0.12015380859375, "signal/accuracy_reward/group_std_mean": 0.15791453421115875, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.060076904296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.060076904296875, "signal/advantage_abs_mean": 0.06460028663277625, "signal/advantage_pre_scale_abs_mean": 0.06460028663277625, "signal/advantage_pre_scale_std": 0.11053272932767869, "signal/advantage_std": 0.11053272932767869, "signal/brier_reward/centered_abs_mean": 0.14295845627784728, "signal/brier_reward/group_std_mean": 0.1843875139951706, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01786980703473091, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01786980703473091, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027392278984189035, "signal/confidence_uniqueness_reward/group_std_mean": 0.03549604080617428, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034240348730236294, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034240348730236294, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022343342658132314, "signal/frontier_aurc_reward/group_std_mean": 0.0036009853240102528, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.999458203907125e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.999458203907125e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19157818257808684, "signal/frontier_coverage_1/group_std_mean": 0.24868603348731994, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_10/centered_abs_mean": 0.19157818257808684, "signal/frontier_coverage_10/group_std_mean": 0.24868603348731994, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_15/centered_abs_mean": 0.19157818257808684, "signal/frontier_coverage_15/group_std_mean": 0.24868603348731994, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_20/centered_abs_mean": 0.19157818257808684, "signal/frontier_coverage_20/group_std_mean": 0.24868603348731994, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_25/centered_abs_mean": 0.17257940769195557, "signal/frontier_coverage_25/group_std_mean": 0.2242441803216934, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003089171182364225, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003089171182364225, "signal/frontier_coverage_5/centered_abs_mean": 0.19157818257808684, "signal/frontier_coverage_5/group_std_mean": 0.24868603348731994, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034292493481189014, "signal/frontier_ece_reward/centered_abs_mean": 0.012200168147683144, "signal/frontier_ece_reward/group_std_mean": 0.015492185577750206, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001525021018460393, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001525021018460393, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.46974372773433737, "eval_calibration/batch_distribution_entropy": 0.8729489041595544, "eval_calibration/buffer_distribution_entropy": 0.9417117821904345, "eval_calibration/confidence_entropy": 0.4194894160323942, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.0390625, "eval_calibration/coverage@20%": 0.15625, "eval_calibration/coverage@25%": 0.1640625, "eval_calibration/coverage@30%": 0.1953125, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.1833765625, "eval_calibration/mean_confidence": 0.49400156249999994, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 406.25, "eval_completions/max_terminated_length": 406.25, "eval_completions/mean_length": 190.00639724731445, "eval_completions/mean_terminated_length": 190.00639724731445, "eval_completions/min_length": 105.5, "eval_completions/min_terminated_length": 105.5, "eval_loss": 0.0, "eval_num_tokens": 501822116.0, "eval_reward": 0.9429960995912552, "eval_reward_std": 0.22927699238061905, "eval_rewards/accuracy_reward": 0.416015625, "eval_rewards/brier_reward": 0.7913370132446289, "eval_rewards/confidence_uniqueness_reward": 0.894775390625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0035256121191196144, "eval_rewards/frontier_coverage_1": 0.21800321713089943, "eval_rewards/frontier_coverage_10": 0.21800321713089943, "eval_rewards/frontier_coverage_15": 0.21800321713089943, "eval_rewards/frontier_coverage_20": 0.21800321713089943, "eval_rewards/frontier_coverage_25": 0.1946805864572525, "eval_rewards/frontier_coverage_5": 0.21800321713089943, "eval_rewards/frontier_ece_reward": 0.010330205783247948, "eval_runtime": 20.2833, "eval_samples_per_second": 24.651, "eval_signal/accuracy_reward/centered_abs_mean": 0.4698486328125, "eval_signal/accuracy_reward/group_std_mean": 0.4919331818819046, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23492431640625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23492431640625, "eval_signal/advantage_abs_mean": 0.20919283106923103, "eval_signal/advantage_pre_scale_abs_mean": 0.20919283106923103, "eval_signal/advantage_pre_scale_std": 0.22680530324578285, "eval_signal/advantage_std": 0.22680530324578285, "eval_signal/brier_reward/centered_abs_mean": 0.21989374607801437, "eval_signal/brier_reward/group_std_mean": 0.27512865513563156, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027486718259751797, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.027486718259751797, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.046142578125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.054415556602180004, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005767822265625, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005767822265625, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0042980361031368375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.00823443685658276, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.693484076298773e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.693484076298773e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.36622023582458496, "eval_signal/frontier_coverage_1/group_std_mean": 0.45468851178884506, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.36622023582458496, "eval_signal/frontier_coverage_10/group_std_mean": 0.45468851178884506, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.36622023582458496, "eval_signal/frontier_coverage_15/group_std_mean": 0.45468851178884506, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.36622023582458496, "eval_signal/frontier_coverage_20/group_std_mean": 0.45468851178884506, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3283776342868805, "eval_signal/frontier_coverage_25/group_std_mean": 0.40934164822101593, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0058779597748070955, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0058779597748070955, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.36622023582458496, "eval_signal/frontier_coverage_5/group_std_mean": 0.45468851178884506, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0065553419990465045, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.018566378857940435, "eval_signal/frontier_ece_reward/group_std_mean": 0.02370068058371544, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023207973572425544, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023207973572425544, "eval_steps_per_second": 0.197, "step": 150 }, { "calibration/aurc": 0.3998863695118646, "calibration/batch_distribution_entropy": 0.9386218344736872, "calibration/buffer_distribution_entropy": 0.9403166197438353, "calibration/confidence_entropy": 0.4160008952255986, "calibration/coverage@0%": 0.002734375, "calibration/coverage@1%": 0.002734375, "calibration/coverage@10%": 0.11640625, "calibration/coverage@15%": 0.16171875, "calibration/coverage@20%": 0.180859375, "calibration/coverage@25%": 0.23671875, "calibration/coverage@30%": 0.27578125, "calibration/coverage@5%": 0.0515625, "calibration/ece": 0.16494032522185803, "calibration/mean_confidence": 0.5358418390577956, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.2, "completions/max_terminated_length": 471.2, "completions/mean_length": 191.12421875, "completions/mean_terminated_length": 191.12421875, "completions/min_length": 94.6, "completions/min_terminated_length": 94.6, "epoch": 0.496, "grad_norm": 0.0010813616681843996, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 519087068.0, "reward": 1.0434123754501343, "reward_std": 0.07760989367961883, "rewards/accuracy_reward": 0.61640625, "rewards/brier_reward": 0.8119624257087708, "rewards/confidence_uniqueness_reward": 0.9533378601074218, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0029467219952493905, "rewards/frontier_coverage_1": 0.1270418345928192, "rewards/frontier_coverage_10": 0.1270418345928192, "rewards/frontier_coverage_15": 0.1270418345928192, "rewards/frontier_coverage_20": 0.1270418345928192, "rewards/frontier_coverage_25": 0.10793070495128632, "rewards/frontier_coverage_5": 0.1270418345928192, "rewards/frontier_ece_reward": 0.010378126800060273, "signal/accuracy_reward/centered_abs_mean": 0.0924072265625, "signal/accuracy_reward/group_std_mean": 0.1291539266705513, "signal/accuracy_reward/group_zero_std_frac": 0.603125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04620361328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04620361328125, "signal/advantage_abs_mean": 0.05744131505489349, "signal/advantage_pre_scale_abs_mean": 0.05744131505489349, "signal/advantage_pre_scale_std": 0.10349351465702057, "signal/advantage_std": 0.10349351465702057, "signal/brier_reward/centered_abs_mean": 0.13018125295639038, "signal/brier_reward/group_std_mean": 0.1682298392057419, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016272656619548798, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016272656619548798, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0224942684173584, "signal/confidence_uniqueness_reward/group_std_mean": 0.02873026542365551, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028117835521698, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028117835521698, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027974717784672977, "signal/frontier_aurc_reward/group_std_mean": 0.004662458691745997, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0074743921868506e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0074743921868506e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1607126325368881, "signal/frontier_coverage_1/group_std_mean": 0.2118363171815872, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_10/centered_abs_mean": 0.1607126325368881, "signal/frontier_coverage_10/group_std_mean": 0.2118363171815872, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_15/centered_abs_mean": 0.1607126325368881, "signal/frontier_coverage_15/group_std_mean": 0.2118363171815872, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_20/centered_abs_mean": 0.1607126325368881, "signal/frontier_coverage_20/group_std_mean": 0.2118363171815872, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_25/centered_abs_mean": 0.145123627781868, "signal/frontier_coverage_25/group_std_mean": 0.19127892851829528, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002597712818533182, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002597712818533182, "signal/frontier_coverage_5/centered_abs_mean": 0.1607126325368881, "signal/frontier_coverage_5/group_std_mean": 0.2118363171815872, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002876756014302373, "signal/frontier_ece_reward/centered_abs_mean": 0.012259660847485065, "signal/frontier_ece_reward/group_std_mean": 0.015376238338649272, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015324576059356331, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015324576059356331, "step": 155 }, { "calibration/aurc": 0.30546842444616046, "calibration/batch_distribution_entropy": 0.9425257723813347, "calibration/buffer_distribution_entropy": 0.9392028630333185, "calibration/confidence_entropy": 0.43838925650671695, "calibration/coverage@0%": 0.01953125, "calibration/coverage@1%": 0.01953125, "calibration/coverage@10%": 0.15625, "calibration/coverage@15%": 0.301953125, "calibration/coverage@20%": 0.3921875, "calibration/coverage@25%": 0.45546875, "calibration/coverage@30%": 0.48984375, "calibration/coverage@5%": 0.062890625, "calibration/ece": 0.14053552377202888, "calibration/mean_confidence": 0.5392683375418532, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.6, "completions/max_terminated_length": 418.6, "completions/mean_length": 186.12705078125, "completions/mean_terminated_length": 186.12705078125, "completions/min_length": 90.6, "completions/min_terminated_length": 90.6, "epoch": 0.512, "grad_norm": 0.0013270304771140218, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 536138673.0, "reward": 1.047600269317627, "reward_std": 0.0819821760058403, "rewards/accuracy_reward": 0.62021484375, "rewards/brier_reward": 0.819976532459259, "rewards/confidence_uniqueness_reward": 0.9540580749511719, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002464716648682952, "rewards/frontier_coverage_1": 0.13561659753322602, "rewards/frontier_coverage_10": 0.13561659753322602, "rewards/frontier_coverage_15": 0.13561659753322602, "rewards/frontier_coverage_20": 0.13561659753322602, "rewards/frontier_coverage_25": 0.122788804769516, "rewards/frontier_coverage_5": 0.13561659753322602, "rewards/frontier_ece_reward": 0.011576398648321629, "signal/accuracy_reward/centered_abs_mean": 0.103265380859375, "signal/accuracy_reward/group_std_mean": 0.13870376944541932, "signal/accuracy_reward/group_zero_std_frac": 0.59375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0516326904296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0516326904296875, "signal/advantage_abs_mean": 0.06201394349336624, "signal/advantage_pre_scale_abs_mean": 0.06201394349336624, "signal/advantage_pre_scale_std": 0.11068142652511596, "signal/advantage_std": 0.11068142652511596, "signal/brier_reward/centered_abs_mean": 0.12418768852949143, "signal/brier_reward/group_std_mean": 0.16362954676151276, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015523461066186428, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015523461066186428, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021464061737060548, "signal/confidence_uniqueness_reward/group_std_mean": 0.027423058450222016, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026830077171325684, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026830077171325684, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023107901914045216, "signal/frontier_aurc_reward/group_std_mean": 0.003936678450554609, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.13631434639683e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.13631434639683e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1497887223958969, "signal/frontier_coverage_1/group_std_mean": 0.2013436108827591, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_10/centered_abs_mean": 0.1497887223958969, "signal/frontier_coverage_10/group_std_mean": 0.2013436108827591, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_15/centered_abs_mean": 0.1497887223958969, "signal/frontier_coverage_15/group_std_mean": 0.2013436108827591, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_20/centered_abs_mean": 0.1497887223958969, "signal/frontier_coverage_20/group_std_mean": 0.2013436108827591, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_25/centered_abs_mean": 0.13205797374248504, "signal/frontier_coverage_25/group_std_mean": 0.17783505022525786, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00236383774317801, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00236383774317801, "signal/frontier_coverage_5/centered_abs_mean": 0.1497887223958969, "signal/frontier_coverage_5/group_std_mean": 0.2013436108827591, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002681217947974801, "signal/frontier_ece_reward/centered_abs_mean": 0.011835797131061554, "signal/frontier_ece_reward/group_std_mean": 0.014990520663559437, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014794746413826943, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014794746413826943, "step": 160 }, { "calibration/aurc": 0.21724688671067058, "calibration/batch_distribution_entropy": 0.9290459239453405, "calibration/buffer_distribution_entropy": 0.9403927660747857, "calibration/confidence_entropy": 0.40619159910496216, "calibration/coverage@0%": 0.01796875, "calibration/coverage@1%": 0.01796875, "calibration/coverage@10%": 0.2698332008317025, "calibration/coverage@15%": 0.37193003913894324, "calibration/coverage@20%": 0.47208674779843446, "calibration/coverage@25%": 0.6163145181017613, "calibration/coverage@30%": 0.7265861974070449, "calibration/coverage@5%": 0.12551216976516635, "calibration/ece": 0.10725554418840492, "calibration/mean_confidence": 0.5539072649878911, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 876.8, "completions/max_terminated_length": 444.4, "completions/mean_length": 181.95234375, "completions/mean_terminated_length": 181.55501403808594, "completions/min_length": 88.4, "completions/min_terminated_length": 88.4, "epoch": 0.528, "grad_norm": 0.0014982522698119283, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 553031401.0, "reward": 1.035895085334778, "reward_std": 0.07947989255189895, "rewards/accuracy_reward": 0.59501953125, "rewards/brier_reward": 0.814744234085083, "rewards/confidence_uniqueness_reward": 0.9488541841506958, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002320343186147511, "rewards/frontier_coverage_1": 0.15851781517267227, "rewards/frontier_coverage_10": 0.15851781517267227, "rewards/frontier_coverage_15": 0.15851781517267227, "rewards/frontier_coverage_20": 0.15851781517267227, "rewards/frontier_coverage_25": 0.14161890745162964, "rewards/frontier_coverage_5": 0.15851781517267227, "rewards/frontier_ece_reward": 0.011600286141037941, "signal/accuracy_reward/centered_abs_mean": 0.106195068359375, "signal/accuracy_reward/group_std_mean": 0.13894531279802322, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0530975341796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0530975341796875, "signal/advantage_abs_mean": 0.060419239848852155, "signal/advantage_pre_scale_abs_mean": 0.060419239848852155, "signal/advantage_pre_scale_std": 0.10889140963554382, "signal/advantage_std": 0.10889140963554382, "signal/brier_reward/centered_abs_mean": 0.12778010070323945, "signal/brier_reward/group_std_mean": 0.16365299820899964, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01597251258790493, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01597251258790493, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025607554242014885, "signal/confidence_uniqueness_reward/group_std_mean": 0.03395786285400391, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032009442802518606, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032009442802518606, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002263115392997861, "signal/frontier_aurc_reward/group_std_mean": 0.0036556614562869073, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.050976349390112e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.050976349390112e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16458451747894287, "signal/frontier_coverage_1/group_std_mean": 0.21477258801460267, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_10/centered_abs_mean": 0.16458451747894287, "signal/frontier_coverage_10/group_std_mean": 0.21477258801460267, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_15/centered_abs_mean": 0.16458451747894287, "signal/frontier_coverage_15/group_std_mean": 0.21477258801460267, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_20/centered_abs_mean": 0.16458451747894287, "signal/frontier_coverage_20/group_std_mean": 0.21477258801460267, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_25/centered_abs_mean": 0.14212436079978943, "signal/frontier_coverage_25/group_std_mean": 0.18566452860832214, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025440258905291557, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025440258905291557, "signal/frontier_coverage_5/centered_abs_mean": 0.16458451747894287, "signal/frontier_coverage_5/group_std_mean": 0.21477258801460267, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029460627119988203, "signal/frontier_ece_reward/centered_abs_mean": 0.010803556628525257, "signal/frontier_ece_reward/group_std_mean": 0.013579228706657887, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001350444578565657, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001350444578565657, "step": 165 }, { "calibration/aurc": 0.26676921280074456, "calibration/batch_distribution_entropy": 0.9071265737076237, "calibration/buffer_distribution_entropy": 0.9395798766963088, "calibration/confidence_entropy": 0.40087509719588776, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.10441153987279843, "calibration/coverage@15%": 0.21305497798434442, "calibration/coverage@20%": 0.30528299290606653, "calibration/coverage@25%": 0.49401372920743636, "calibration/coverage@30%": 0.7015235139432485, "calibration/coverage@5%": 0.0, "calibration/ece": 0.10255522742341461, "calibration/mean_confidence": 0.5823576404726291, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 631.0, "completions/max_terminated_length": 412.0, "completions/mean_length": 179.7796875, "completions/mean_terminated_length": 179.5145263671875, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.544, "grad_norm": 0.0018303662072867155, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 570035929.0, "reward": 1.0470038890838622, "reward_std": 0.08949521332979202, "rewards/accuracy_reward": 0.63212890625, "rewards/brier_reward": 0.8009482145309448, "rewards/confidence_uniqueness_reward": 0.9542343854904175, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0027931962627917527, "rewards/frontier_coverage_1": 0.10076590478420258, "rewards/frontier_coverage_10": 0.10076590478420258, "rewards/frontier_coverage_15": 0.10076590478420258, "rewards/frontier_coverage_20": 0.10076590478420258, "rewards/frontier_coverage_25": 0.08373739868402481, "rewards/frontier_coverage_5": 0.10076590478420258, "rewards/frontier_ece_reward": 0.00937446840107441, "signal/accuracy_reward/centered_abs_mean": 0.126141357421875, "signal/accuracy_reward/group_std_mean": 0.16540803015232086, "signal/accuracy_reward/group_zero_std_frac": 0.528125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0630706787109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0630706787109375, "signal/advantage_abs_mean": 0.06790957748889923, "signal/advantage_pre_scale_abs_mean": 0.06790957748889923, "signal/advantage_pre_scale_std": 0.11734311580657959, "signal/advantage_std": 0.11734311580657959, "signal/brier_reward/centered_abs_mean": 0.13875785171985627, "signal/brier_reward/group_std_mean": 0.1772547960281372, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017344731464982034, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017344731464982034, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02129780054092407, "signal/confidence_uniqueness_reward/group_std_mean": 0.02833399027585983, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002662225067615509, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002662225067615509, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002696292009204626, "signal/frontier_aurc_reward/group_std_mean": 0.004545783344656229, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8263624921673906e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8263624921673906e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17123860418796538, "signal/frontier_coverage_1/group_std_mean": 0.22258543372154235, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_10/centered_abs_mean": 0.17123860418796538, "signal/frontier_coverage_10/group_std_mean": 0.22258543372154235, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_15/centered_abs_mean": 0.17123860418796538, "signal/frontier_coverage_15/group_std_mean": 0.22258543372154235, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_20/centered_abs_mean": 0.17123860418796538, "signal/frontier_coverage_20/group_std_mean": 0.22258543372154235, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_25/centered_abs_mean": 0.13877653181552888, "signal/frontier_coverage_25/group_std_mean": 0.18102459311485292, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002484099706634879, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002484099706634879, "signal/frontier_coverage_5/centered_abs_mean": 0.17123860418796538, "signal/frontier_coverage_5/group_std_mean": 0.22258543372154235, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030651709996163843, "signal/frontier_ece_reward/centered_abs_mean": 0.011479491926729679, "signal/frontier_ece_reward/group_std_mean": 0.014317681267857551, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014349364908412098, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014349364908412098, "step": 170 }, { "calibration/aurc": 0.25630293714761593, "calibration/batch_distribution_entropy": 0.9494929939257506, "calibration/buffer_distribution_entropy": 0.9382286137261033, "calibration/confidence_entropy": 0.4323171410988912, "calibration/coverage@0%": 0.06328125, "calibration/coverage@1%": 0.0875, "calibration/coverage@10%": 0.21685267857142856, "calibration/coverage@15%": 0.2813272076810176, "calibration/coverage@20%": 0.40833460738747557, "calibration/coverage@25%": 0.48687851638943247, "calibration/coverage@30%": 0.5799000122309198, "calibration/coverage@5%": 0.18714377446183952, "calibration/ece": 0.11609540804760625, "calibration/mean_confidence": 0.5559859733345548, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 628.0, "completions/max_terminated_length": 406.6, "completions/mean_length": 174.68447265625, "completions/mean_terminated_length": 174.41849670410156, "completions/min_length": 85.6, "completions/min_terminated_length": 85.6, "epoch": 0.56, "grad_norm": 0.0008989165653474629, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 586646106.0, "reward": 1.0321044683456422, "reward_std": 0.07522638440132141, "rewards/accuracy_reward": 0.58759765625, "rewards/brier_reward": 0.8196055650711059, "rewards/confidence_uniqueness_reward": 0.9552822113037109, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002765678521245718, "rewards/frontier_coverage_1": 0.14798882305622102, "rewards/frontier_coverage_10": 0.14798882305622102, "rewards/frontier_coverage_15": 0.14798882305622102, "rewards/frontier_coverage_20": 0.14798882305622102, "rewards/frontier_coverage_25": 0.1166765883564949, "rewards/frontier_coverage_5": 0.14798882305622102, "rewards/frontier_ece_reward": 0.01006685383617878, "signal/accuracy_reward/centered_abs_mean": 0.087664794921875, "signal/accuracy_reward/group_std_mean": 0.12393931746482849, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0438323974609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0438323974609375, "signal/advantage_abs_mean": 0.0555981308221817, "signal/advantage_pre_scale_abs_mean": 0.0555981308221817, "signal/advantage_pre_scale_std": 0.10245826691389084, "signal/advantage_std": 0.10245826691389084, "signal/brier_reward/centered_abs_mean": 0.12557310312986375, "signal/brier_reward/group_std_mean": 0.16274870932102203, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01569663789123297, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01569663789123297, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0206695556640625, "signal/confidence_uniqueness_reward/group_std_mean": 0.027292505279183386, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025836944580078123, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025836944580078123, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027116264682263136, "signal/frontier_aurc_reward/group_std_mean": 0.004758490296080708, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.853811406064778e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.853811406064778e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15379790365695953, "signal/frontier_coverage_1/group_std_mean": 0.20061783492565155, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_10/centered_abs_mean": 0.15379790365695953, "signal/frontier_coverage_10/group_std_mean": 0.20061783492565155, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_15/centered_abs_mean": 0.15379790365695953, "signal/frontier_coverage_15/group_std_mean": 0.20061783492565155, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_20/centered_abs_mean": 0.15379790365695953, "signal/frontier_coverage_20/group_std_mean": 0.20061783492565155, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_25/centered_abs_mean": 0.11712785661220551, "signal/frontier_coverage_25/group_std_mean": 0.1535368263721466, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020965886767953636, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020965886767953636, "signal/frontier_coverage_5/centered_abs_mean": 0.15379790365695953, "signal/frontier_coverage_5/group_std_mean": 0.20061783492565155, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027529822662472727, "signal/frontier_ece_reward/centered_abs_mean": 0.01002585757523775, "signal/frontier_ece_reward/group_std_mean": 0.012720918469130992, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012532321969047188, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012532321969047188, "step": 175 }, { "calibration/aurc": 0.3079945286543727, "calibration/batch_distribution_entropy": 0.9393406726013804, "calibration/buffer_distribution_entropy": 0.9394180622540608, "calibration/confidence_entropy": 0.4314125793441579, "calibration/coverage@0%": 0.009375, "calibration/coverage@1%": 0.009375, "calibration/coverage@10%": 0.098046875, "calibration/coverage@15%": 0.155078125, "calibration/coverage@20%": 0.25546875, "calibration/coverage@25%": 0.36875, "calibration/coverage@30%": 0.491796875, "calibration/coverage@5%": 0.047265625, "calibration/ece": 0.10765465442729547, "calibration/mean_confidence": 0.5416941542047502, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.2, "completions/max_terminated_length": 411.2, "completions/mean_length": 171.85869140625, "completions/mean_terminated_length": 171.85869140625, "completions/min_length": 84.8, "completions/min_terminated_length": 84.8, "epoch": 0.576, "grad_norm": 0.004144130740314722, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 603592563.0, "reward": 1.0301400423049927, "reward_std": 0.06984256058931351, "rewards/accuracy_reward": 0.5873046875, "rewards/brier_reward": 0.8139339208602905, "rewards/confidence_uniqueness_reward": 0.9532180786132812, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0029315354768186808, "rewards/frontier_coverage_1": 0.14102224856615067, "rewards/frontier_coverage_10": 0.14102224856615067, "rewards/frontier_coverage_15": 0.14102224856615067, "rewards/frontier_coverage_20": 0.14102224856615067, "rewards/frontier_coverage_25": 0.11173355653882026, "rewards/frontier_coverage_5": 0.14102224856615067, "rewards/frontier_ece_reward": 0.008197224885225295, "signal/accuracy_reward/centered_abs_mean": 0.0781005859375, "signal/accuracy_reward/group_std_mean": 0.11179777681827545, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03905029296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03905029296875, "signal/advantage_abs_mean": 0.05057525113224983, "signal/advantage_pre_scale_abs_mean": 0.05057525113224983, "signal/advantage_pre_scale_std": 0.09479147344827651, "signal/advantage_std": 0.09479147344827651, "signal/brier_reward/centered_abs_mean": 0.12227167785167695, "signal/brier_reward/group_std_mean": 0.15898571908473969, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015283959731459618, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015283959731459618, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021122956275939943, "signal/confidence_uniqueness_reward/group_std_mean": 0.02684759609401226, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002640369534492493, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002640369534492493, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023652775678783657, "signal/frontier_aurc_reward/group_std_mean": 0.003888764465227723, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2338467756053434e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2338467756053434e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15091627687215806, "signal/frontier_coverage_1/group_std_mean": 0.19856328666210174, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_10/centered_abs_mean": 0.15091627687215806, "signal/frontier_coverage_10/group_std_mean": 0.19856328666210174, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_15/centered_abs_mean": 0.15091627687215806, "signal/frontier_coverage_15/group_std_mean": 0.19856328666210174, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_20/centered_abs_mean": 0.15091627687215806, "signal/frontier_coverage_20/group_std_mean": 0.19856328666210174, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_25/centered_abs_mean": 0.11474166065454483, "signal/frontier_coverage_25/group_std_mean": 0.15160171389579774, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020538756158202886, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020538756158202886, "signal/frontier_coverage_5/centered_abs_mean": 0.15091627687215806, "signal/frontier_coverage_5/group_std_mean": 0.19856328666210174, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002701401337981224, "signal/frontier_ece_reward/centered_abs_mean": 0.009535189159214497, "signal/frontier_ece_reward/group_std_mean": 0.012084404565393924, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011918986449018121, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011918986449018121, "step": 180 }, { "calibration/aurc": 0.30330525952000287, "calibration/batch_distribution_entropy": 0.9548014004939069, "calibration/buffer_distribution_entropy": 0.9393214771578714, "calibration/confidence_entropy": 0.4342403289114817, "calibration/coverage@0%": 0.032821673189823874, "calibration/coverage@1%": 0.032821673189823874, "calibration/coverage@10%": 0.15863961594911938, "calibration/coverage@15%": 0.29188937133072407, "calibration/coverage@20%": 0.40682943982387476, "calibration/coverage@25%": 0.5268101761252446, "calibration/coverage@30%": 0.5983304794520548, "calibration/coverage@5%": 0.06719667318982388, "calibration/ece": 0.11588005294478321, "calibration/mean_confidence": 0.5073022957419638, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.8, "completions/max_terminated_length": 484.8, "completions/mean_length": 170.01044921875, "completions/mean_terminated_length": 170.01044921875, "completions/min_length": 85.8, "completions/min_terminated_length": 85.8, "epoch": 0.592, "grad_norm": 0.0013855872675776482, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 620501182.0, "reward": 1.021399199962616, "reward_std": 0.07876999825239181, "rewards/accuracy_reward": 0.5703125, "rewards/brier_reward": 0.806274163722992, "rewards/confidence_uniqueness_reward": 0.9500226140022278, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0024875002447515724, "rewards/frontier_coverage_1": 0.15400602370500566, "rewards/frontier_coverage_10": 0.15400602370500566, "rewards/frontier_coverage_15": 0.15400602370500566, "rewards/frontier_coverage_20": 0.15400602370500566, "rewards/frontier_coverage_25": 0.11699056923389435, "rewards/frontier_coverage_5": 0.15400602370500566, "rewards/frontier_ece_reward": 0.008153815101832152, "signal/accuracy_reward/centered_abs_mean": 0.10953369140625, "signal/accuracy_reward/group_std_mean": 0.14035410881042482, "signal/accuracy_reward/group_zero_std_frac": 0.615625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054766845703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.054766845703125, "signal/advantage_abs_mean": 0.06093080118298531, "signal/advantage_pre_scale_abs_mean": 0.06093080118298531, "signal/advantage_pre_scale_std": 0.10702161937952041, "signal/advantage_std": 0.10702161937952041, "signal/brier_reward/centered_abs_mean": 0.12534408420324325, "signal/brier_reward/group_std_mean": 0.1609453946352005, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015668010525405406, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015668010525405406, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023938726261258127, "signal/confidence_uniqueness_reward/group_std_mean": 0.030889422819018363, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002992340782657266, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002992340782657266, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020968030439689755, "signal/frontier_aurc_reward/group_std_mean": 0.003513322817161679, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7532773421844465e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7532773421844465e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17061007618904114, "signal/frontier_coverage_1/group_std_mean": 0.2188116878271103, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_10/centered_abs_mean": 0.17061007618904114, "signal/frontier_coverage_10/group_std_mean": 0.2188116878271103, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_15/centered_abs_mean": 0.17061007618904114, "signal/frontier_coverage_15/group_std_mean": 0.2188116878271103, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_20/centered_abs_mean": 0.17061007618904114, "signal/frontier_coverage_20/group_std_mean": 0.2188116878271103, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_25/centered_abs_mean": 0.1261043816804886, "signal/frontier_coverage_25/group_std_mean": 0.16292393803596497, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00225726836360991, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00225726836360991, "signal/frontier_coverage_5/centered_abs_mean": 0.17061007618904114, "signal/frontier_coverage_5/group_std_mean": 0.2188116878271103, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030539202969521286, "signal/frontier_ece_reward/centered_abs_mean": 0.008867009729146957, "signal/frontier_ece_reward/group_std_mean": 0.011277035437524318, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011083762161433696, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011083762161433696, "step": 185 }, { "calibration/aurc": 0.23239763422790666, "calibration/batch_distribution_entropy": 0.8995749973819773, "calibration/buffer_distribution_entropy": 0.9392606380989322, "calibration/confidence_entropy": 0.3906407868577815, "calibration/coverage@0%": 0.055108702299412914, "calibration/coverage@1%": 0.055108702299412914, "calibration/coverage@10%": 0.24500902030332677, "calibration/coverage@15%": 0.3739374388454012, "calibration/coverage@20%": 0.5095011313600782, "calibration/coverage@25%": 0.5989680161448141, "calibration/coverage@30%": 0.6884356653620352, "calibration/coverage@5%": 0.1422570633561644, "calibration/ece": 0.10831119079106703, "calibration/mean_confidence": 0.46993740997275896, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 846.0, "completions/max_terminated_length": 387.8, "completions/mean_length": 171.85595703125, "completions/mean_terminated_length": 171.589794921875, "completions/min_length": 83.4, "completions/min_terminated_length": 83.4, "epoch": 0.608, "grad_norm": 0.0008504785946570337, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 637260475.0, "reward": 1.0233449697494508, "reward_std": 0.06167818456888199, "rewards/accuracy_reward": 0.56572265625, "rewards/brier_reward": 0.8286043047904968, "rewards/confidence_uniqueness_reward": 0.9418840408325195, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0020069938618689775, "rewards/frontier_coverage_1": 0.17736267149448395, "rewards/frontier_coverage_10": 0.17736267149448395, "rewards/frontier_coverage_15": 0.17736267149448395, "rewards/frontier_coverage_20": 0.17532597184181214, "rewards/frontier_coverage_25": 0.13192782700061798, "rewards/frontier_coverage_5": 0.17736267149448395, "rewards/frontier_ece_reward": 0.009247677959501743, "signal/accuracy_reward/centered_abs_mean": 0.084332275390625, "signal/accuracy_reward/group_std_mean": 0.11399843543767929, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421661376953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0421661376953125, "signal/advantage_abs_mean": 0.04536699652671814, "signal/advantage_pre_scale_abs_mean": 0.04536699652671814, "signal/advantage_pre_scale_std": 0.08548016101121902, "signal/advantage_std": 0.08548016101121902, "signal/brier_reward/centered_abs_mean": 0.11550195217132568, "signal/brier_reward/group_std_mean": 0.15080228447914124, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01443774402141571, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01443774402141571, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02914600744843483, "signal/confidence_uniqueness_reward/group_std_mean": 0.03730083778500557, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036432509310543536, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036432509310543536, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016248196363449097, "signal/frontier_aurc_reward/group_std_mean": 0.0026754786260426043, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.908426904468797e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.908426904468797e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1710997462272644, "signal/frontier_coverage_1/group_std_mean": 0.2212434083223343, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030626854859292507, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030626854859292507, "signal/frontier_coverage_10/centered_abs_mean": 0.1710997462272644, "signal/frontier_coverage_10/group_std_mean": 0.2212434083223343, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030626854859292507, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030626854859292507, "signal/frontier_coverage_15/centered_abs_mean": 0.1710997462272644, "signal/frontier_coverage_15/group_std_mean": 0.2212434083223343, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030626854859292507, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030626854859292507, "signal/frontier_coverage_20/centered_abs_mean": 0.1688907653093338, "signal/frontier_coverage_20/group_std_mean": 0.2184792071580887, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030231445096433164, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030231445096433164, "signal/frontier_coverage_25/centered_abs_mean": 0.11933436542749405, "signal/frontier_coverage_25/group_std_mean": 0.15433123409748079, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021360850892961024, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021360850892961024, "signal/frontier_coverage_5/centered_abs_mean": 0.1710997462272644, "signal/frontier_coverage_5/group_std_mean": 0.2212434083223343, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030626854859292507, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030626854859292507, "signal/frontier_ece_reward/centered_abs_mean": 0.008279498293995857, "signal/frontier_ece_reward/group_std_mean": 0.010457862541079522, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010349372867494821, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010349372867494821, "step": 190 }, { "calibration/aurc": 0.28448200475198304, "calibration/batch_distribution_entropy": 0.9490308362782736, "calibration/buffer_distribution_entropy": 0.9402397748087887, "calibration/confidence_entropy": 0.43260784106219513, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1171875, "calibration/coverage@15%": 0.230859375, "calibration/coverage@20%": 0.39609375, "calibration/coverage@25%": 0.501953125, "calibration/coverage@30%": 0.572265625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.12011852848050042, "calibration/mean_confidence": 0.4942197901707141, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 426.6, "completions/max_terminated_length": 426.6, "completions/mean_length": 173.60224609375, "completions/mean_terminated_length": 173.60224609375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.624, "grad_norm": 0.0009778736857697368, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 654382066.0, "reward": 1.0262583494186401, "reward_std": 0.07113818228244781, "rewards/accuracy_reward": 0.57607421875, "rewards/brier_reward": 0.8143329381942749, "rewards/confidence_uniqueness_reward": 0.95128173828125, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002561471750959754, "rewards/frontier_coverage_1": 0.1634742349386215, "rewards/frontier_coverage_10": 0.1634742349386215, "rewards/frontier_coverage_15": 0.1634742349386215, "rewards/frontier_coverage_20": 0.15868508964776992, "rewards/frontier_coverage_25": 0.11462056636810303, "rewards/frontier_coverage_5": 0.1634742349386215, "rewards/frontier_ece_reward": 0.007746654096990824, "signal/accuracy_reward/centered_abs_mean": 0.092962646484375, "signal/accuracy_reward/group_std_mean": 0.124091537296772, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0464813232421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0464813232421875, "signal/advantage_abs_mean": 0.054235681891441345, "signal/advantage_pre_scale_abs_mean": 0.054235681891441345, "signal/advantage_pre_scale_std": 0.09768829345703126, "signal/advantage_std": 0.09768829345703126, "signal/brier_reward/centered_abs_mean": 0.12480789422988892, "signal/brier_reward/group_std_mean": 0.16061947047710418, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015600986778736115, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015600986778736115, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02238917350769043, "signal/confidence_uniqueness_reward/group_std_mean": 0.02832588031888008, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027986466884613037, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027986466884613037, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024256373289972544, "signal/frontier_aurc_reward/group_std_mean": 0.004125529807060957, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3418908171588554e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3418908171588554e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1706692099571228, "signal/frontier_coverage_1/group_std_mean": 0.22160598039627075, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030549786519259215, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030549786519259215, "signal/frontier_coverage_10/centered_abs_mean": 0.1706692099571228, "signal/frontier_coverage_10/group_std_mean": 0.22160598039627075, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030549786519259215, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030549786519259215, "signal/frontier_coverage_15/centered_abs_mean": 0.1706692099571228, "signal/frontier_coverage_15/group_std_mean": 0.22160598039627075, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030549786519259215, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030549786519259215, "signal/frontier_coverage_20/centered_abs_mean": 0.163962659239769, "signal/frontier_coverage_20/group_std_mean": 0.21292484402656556, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029349314980208875, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029349314980208875, "signal/frontier_coverage_25/centered_abs_mean": 0.11128398329019547, "signal/frontier_coverage_25/group_std_mean": 0.14506538808345795, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001991983223706484, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001991983223706484, "signal/frontier_coverage_5/centered_abs_mean": 0.1706692099571228, "signal/frontier_coverage_5/group_std_mean": 0.22160598039627075, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030549786519259215, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030549786519259215, "signal/frontier_ece_reward/centered_abs_mean": 0.007852244190871716, "signal/frontier_ece_reward/group_std_mean": 0.009968752972781658, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009815305238589644, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009815305238589644, "step": 195 }, { "calibration/aurc": 0.2749330603883652, "calibration/batch_distribution_entropy": 0.9157628654883949, "calibration/buffer_distribution_entropy": 0.9417889462204929, "calibration/confidence_entropy": 0.415460098321436, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.28203125, "calibration/coverage@15%": 0.34375, "calibration/coverage@20%": 0.41796875, "calibration/coverage@25%": 0.528125, "calibration/coverage@30%": 0.599609375, "calibration/coverage@5%": 0.051953125, "calibration/ece": 0.15501200980610025, "calibration/mean_confidence": 0.5856844897140661, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 787.0, "completions/max_terminated_length": 591.4, "completions/mean_length": 174.65244140625, "completions/mean_terminated_length": 174.38660888671876, "completions/min_length": 82.6, "completions/min_terminated_length": 82.6, "epoch": 0.64, "grad_norm": 0.0010942368535324931, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 671513195.0, "reward": 1.0432765126228332, "reward_std": 0.0722689650952816, "rewards/accuracy_reward": 0.618359375, "rewards/brier_reward": 0.8138016819953918, "rewards/confidence_uniqueness_reward": 0.9505192637443542, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002926664659753442, "rewards/frontier_coverage_1": 0.1259578838944435, "rewards/frontier_coverage_10": 0.1259578838944435, "rewards/frontier_coverage_15": 0.1259578838944435, "rewards/frontier_coverage_20": 0.11951190680265426, "rewards/frontier_coverage_25": 0.08500352278351783, "rewards/frontier_coverage_5": 0.1259578838944435, "rewards/frontier_ece_reward": 0.008609351143240929, "signal/accuracy_reward/centered_abs_mean": 0.09111328125, "signal/accuracy_reward/group_std_mean": 0.11743369847536086, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045556640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045556640625, "signal/advantage_abs_mean": 0.05580408796668053, "signal/advantage_pre_scale_abs_mean": 0.05580408796668053, "signal/advantage_pre_scale_std": 0.10402074754238129, "signal/advantage_std": 0.10402074754238129, "signal/brier_reward/centered_abs_mean": 0.12034919857978821, "signal/brier_reward/group_std_mean": 0.15445152223110198, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015043649822473526, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015043649822473526, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02375582978129387, "signal/confidence_uniqueness_reward/group_std_mean": 0.030650369822978973, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029694787226617336, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029694787226617336, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.00334425400942564, "signal/frontier_aurc_reward/group_std_mean": 0.005675982683897018, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.98621423705481e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.98621423705481e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13868267834186554, "signal/frontier_coverage_1/group_std_mean": 0.1825660526752472, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024824199732393025, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024824199732393025, "signal/frontier_coverage_10/centered_abs_mean": 0.13868267834186554, "signal/frontier_coverage_10/group_std_mean": 0.1825660526752472, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024824199732393025, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024824199732393025, "signal/frontier_coverage_15/centered_abs_mean": 0.13868267834186554, "signal/frontier_coverage_15/group_std_mean": 0.1825660526752472, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024824199732393025, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024824199732393025, "signal/frontier_coverage_20/centered_abs_mean": 0.13057875782251357, "signal/frontier_coverage_20/group_std_mean": 0.17225528359413148, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002337359730154276, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002337359730154276, "signal/frontier_coverage_25/centered_abs_mean": 0.08511566817760467, "signal/frontier_coverage_25/group_std_mean": 0.11278624832630157, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015235703671351076, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015235703671351076, "signal/frontier_coverage_5/centered_abs_mean": 0.13868267834186554, "signal/frontier_coverage_5/group_std_mean": 0.1825660526752472, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024824199732393025, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024824199732393025, "signal/frontier_ece_reward/centered_abs_mean": 0.008099580183625221, "signal/frontier_ece_reward/group_std_mean": 0.010334640741348267, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010124475229531527, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010124475229531527, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.4334341680670255, "eval_calibration/batch_distribution_entropy": 0.8227332130727718, "eval_calibration/buffer_distribution_entropy": 0.9423817091169264, "eval_calibration/confidence_entropy": 0.38854819135867324, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.1328125, "eval_calibration/coverage@15%": 0.15625, "eval_calibration/coverage@20%": 0.171875, "eval_calibration/coverage@25%": 0.2421875, "eval_calibration/coverage@30%": 0.4140625, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.225390625, "eval_calibration/mean_confidence": 0.547578125, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 309.75, "eval_completions/max_terminated_length": 309.75, "eval_completions/mean_length": 173.53104782104492, "eval_completions/mean_terminated_length": 173.53104782104492, "eval_completions/min_length": 98.5, "eval_completions/min_terminated_length": 98.5, "eval_loss": 0.0, "eval_num_tokens": 671513195.0, "eval_reward": 0.9446232914924622, "eval_reward_std": 0.2354012466967106, "eval_rewards/accuracy_reward": 0.41796875, "eval_rewards/brier_reward": 0.7981462776660919, "eval_rewards/confidence_uniqueness_reward": 0.8916015625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004759653122164309, "eval_rewards/frontier_coverage_1": 0.23640722408890724, "eval_rewards/frontier_coverage_10": 0.23640722408890724, "eval_rewards/frontier_coverage_15": 0.23640722408890724, "eval_rewards/frontier_coverage_20": 0.2243974544107914, "eval_rewards/frontier_coverage_25": 0.14654707163572311, "eval_rewards/frontier_coverage_5": 0.23640722408890724, "eval_rewards/frontier_ece_reward": 0.007511715171858668, "eval_runtime": 17.5742, "eval_samples_per_second": 28.451, "eval_signal/accuracy_reward/centered_abs_mean": 0.469970703125, "eval_signal/accuracy_reward/group_std_mean": 0.4919528365135193, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2349853515625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2349853515625, "eval_signal/advantage_abs_mean": 0.2164347991347313, "eval_signal/advantage_pre_scale_abs_mean": 0.2164347991347313, "eval_signal/advantage_pre_scale_std": 0.23300310224294662, "eval_signal/advantage_std": 0.23300310224294662, "eval_signal/brier_reward/centered_abs_mean": 0.2241257205605507, "eval_signal/brier_reward/group_std_mean": 0.2780345007777214, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028015715070068836, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.028015715070068836, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.050201416015625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06067673675715923, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006275177001953125, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006275177001953125, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0064848861657083035, "eval_signal/frontier_aurc_reward/group_std_mean": 0.013537641265429556, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00011607945270952769, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00011607945270952769, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3593551740050316, "eval_signal/frontier_coverage_1/group_std_mean": 0.43338172882795334, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006432457361370325, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006432457361370325, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3593551740050316, "eval_signal/frontier_coverage_10/group_std_mean": 0.43338172882795334, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006432457361370325, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006432457361370325, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3593551740050316, "eval_signal/frontier_coverage_15/group_std_mean": 0.43338172882795334, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006432457361370325, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006432457361370325, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.34003832191228867, "eval_signal/frontier_coverage_20/group_std_mean": 0.4100157469511032, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006086685578338802, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006086685578338802, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.21234332025051117, "eval_signal/frontier_coverage_25/group_std_mean": 0.2588745690882206, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003800945356488228, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003800945356488228, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3593551740050316, "eval_signal/frontier_coverage_5/group_std_mean": 0.43338172882795334, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006432457361370325, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006432457361370325, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.012599717359989882, "eval_signal/frontier_ece_reward/group_std_mean": 0.01608213922008872, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015749646699987352, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015749646699987352, "eval_steps_per_second": 0.228, "step": 200 }, { "calibration/aurc": 0.4232796304790516, "calibration/batch_distribution_entropy": 0.9313572623800672, "calibration/buffer_distribution_entropy": 0.9428452319896881, "calibration/confidence_entropy": 0.4383607192180536, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.013671875, "calibration/coverage@20%": 0.0234375, "calibration/coverage@25%": 0.094140625, "calibration/coverage@30%": 0.26796875, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1789134632389641, "calibration/mean_confidence": 0.563563677522916, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.4, "completions/max_terminated_length": 416.4, "completions/mean_length": 172.06005859375, "completions/mean_terminated_length": 172.06005859375, "completions/min_length": 88.4, "completions/min_terminated_length": 88.4, "epoch": 0.656, "grad_norm": 0.0009223693050444126, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 688131634.0, "reward": 1.0271916270256043, "reward_std": 0.07614715248346329, "rewards/accuracy_reward": 0.58486328125, "rewards/brier_reward": 0.8058655500411988, "rewards/confidence_uniqueness_reward": 0.9529876708984375, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.004022358637303114, "rewards/frontier_coverage_1": 0.14106732606887817, "rewards/frontier_coverage_10": 0.14106732606887817, "rewards/frontier_coverage_15": 0.14106732606887817, "rewards/frontier_coverage_20": 0.136381658911705, "rewards/frontier_coverage_25": 0.08988674730062485, "rewards/frontier_coverage_5": 0.14106732606887817, "rewards/frontier_ece_reward": 0.006597818806767464, "signal/accuracy_reward/centered_abs_mean": 0.095013427734375, "signal/accuracy_reward/group_std_mean": 0.12622675597667693, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0475067138671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0475067138671875, "signal/advantage_abs_mean": 0.05853464975953102, "signal/advantage_pre_scale_abs_mean": 0.05853464975953102, "signal/advantage_pre_scale_std": 0.10676633566617966, "signal/advantage_std": 0.10676633566617966, "signal/brier_reward/centered_abs_mean": 0.12741532027721406, "signal/brier_reward/group_std_mean": 0.16260745525360107, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015926915034651757, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015926915034651757, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02163679599761963, "signal/confidence_uniqueness_reward/group_std_mean": 0.027704115584492685, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027045994997024537, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027045994997024537, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.003789227642118931, "signal/frontier_aurc_reward/group_std_mean": 0.006368549633771181, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.782717391615734e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.782717391615734e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14675131738185881, "signal/frontier_coverage_1/group_std_mean": 0.19162927567958832, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026268486864864824, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026268486864864824, "signal/frontier_coverage_10/centered_abs_mean": 0.14675131738185881, "signal/frontier_coverage_10/group_std_mean": 0.19162927567958832, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026268486864864824, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026268486864864824, "signal/frontier_coverage_15/centered_abs_mean": 0.14675131738185881, "signal/frontier_coverage_15/group_std_mean": 0.19162927567958832, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026268486864864824, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026268486864864824, "signal/frontier_coverage_20/centered_abs_mean": 0.14089352786540985, "signal/frontier_coverage_20/group_std_mean": 0.18426248133182527, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002521994011476636, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002521994011476636, "signal/frontier_coverage_25/centered_abs_mean": 0.0880542129278183, "signal/frontier_coverage_25/group_std_mean": 0.11609538346529007, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015761703718453646, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015761703718453646, "signal/frontier_coverage_5/centered_abs_mean": 0.14675131738185881, "signal/frontier_coverage_5/group_std_mean": 0.19162927567958832, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026268486864864824, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026268486864864824, "signal/frontier_ece_reward/centered_abs_mean": 0.008318292908370495, "signal/frontier_ece_reward/group_std_mean": 0.010465490072965622, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010397866135463119, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010397866135463119, "step": 205 }, { "calibration/aurc": 0.31886242468045917, "calibration/batch_distribution_entropy": 0.8984277636909574, "calibration/buffer_distribution_entropy": 0.9441781932677561, "calibration/confidence_entropy": 0.39574065662655594, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.078125, "calibration/coverage@15%": 0.101171875, "calibration/coverage@20%": 0.232421875, "calibration/coverage@25%": 0.284765625, "calibration/coverage@30%": 0.398828125, "calibration/coverage@5%": 0.043359375, "calibration/ece": 0.17915248377084506, "calibration/mean_confidence": 0.5848963897512844, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.4, "completions/max_terminated_length": 416.4, "completions/mean_length": 169.32275390625, "completions/mean_terminated_length": 169.32275390625, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.672, "grad_norm": 0.0012568546226248145, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 704778939.0, "reward": 1.0182757139205934, "reward_std": 0.07293715327978134, "rewards/accuracy_reward": 0.56669921875, "rewards/brier_reward": 0.8031091213226318, "rewards/confidence_uniqueness_reward": 0.9437248229980468, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0037748562172055244, "rewards/frontier_coverage_1": 0.15582288652658463, "rewards/frontier_coverage_10": 0.15582288652658463, "rewards/frontier_coverage_15": 0.15582288652658463, "rewards/frontier_coverage_20": 0.15092057287693023, "rewards/frontier_coverage_25": 0.10167192667722702, "rewards/frontier_coverage_5": 0.15582288652658463, "rewards/frontier_ece_reward": 0.007688873633742333, "signal/accuracy_reward/centered_abs_mean": 0.095550537109375, "signal/accuracy_reward/group_std_mean": 0.12681576907634734, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477752685546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0477752685546875, "signal/advantage_abs_mean": 0.055553416907787326, "signal/advantage_pre_scale_abs_mean": 0.055553416907787326, "signal/advantage_pre_scale_std": 0.10308739989995956, "signal/advantage_std": 0.10308739989995956, "signal/brier_reward/centered_abs_mean": 0.12587104141712188, "signal/brier_reward/group_std_mean": 0.16116216480731965, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015733880177140235, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015733880177140235, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027187180519104005, "signal/confidence_uniqueness_reward/group_std_mean": 0.034485659748315814, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033983975648880006, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033983975648880006, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.003499569371342659, "signal/frontier_aurc_reward/group_std_mean": 0.005579089093953371, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.264229159569368e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.264229159569368e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15768970251083375, "signal/frontier_coverage_1/group_std_mean": 0.20257034599781037, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028226455673575402, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028226455673575402, "signal/frontier_coverage_10/centered_abs_mean": 0.15768970251083375, "signal/frontier_coverage_10/group_std_mean": 0.20257034599781037, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028226455673575402, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028226455673575402, "signal/frontier_coverage_15/centered_abs_mean": 0.15768970251083375, "signal/frontier_coverage_15/group_std_mean": 0.20257034599781037, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028226455673575402, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028226455673575402, "signal/frontier_coverage_20/centered_abs_mean": 0.15148624479770662, "signal/frontier_coverage_20/group_std_mean": 0.19488502740859986, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002711603697389364, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002711603697389364, "signal/frontier_coverage_25/centered_abs_mean": 0.09543160498142242, "signal/frontier_coverage_25/group_std_mean": 0.12344460785388947, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017082256963476539, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017082256963476539, "signal/frontier_coverage_5/centered_abs_mean": 0.15768970251083375, "signal/frontier_coverage_5/group_std_mean": 0.20257034599781037, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028226455673575402, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028226455673575402, "signal/frontier_ece_reward/centered_abs_mean": 0.008075537905097008, "signal/frontier_ece_reward/group_std_mean": 0.01002963688224554, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001009442238137126, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001009442238137126, "step": 210 }, { "calibration/aurc": 0.3313409360366979, "calibration/batch_distribution_entropy": 0.9063225326335698, "calibration/buffer_distribution_entropy": 0.9449494763071751, "calibration/confidence_entropy": 0.41968592190031984, "calibration/coverage@0%": 0.02265625, "calibration/coverage@1%": 0.02265625, "calibration/coverage@10%": 0.118359375, "calibration/coverage@15%": 0.215234375, "calibration/coverage@20%": 0.332421875, "calibration/coverage@25%": 0.41796875, "calibration/coverage@30%": 0.469140625, "calibration/coverage@5%": 0.06953125, "calibration/ece": 0.142804706148896, "calibration/mean_confidence": 0.5511033871465738, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.2, "completions/max_terminated_length": 441.2, "completions/mean_length": 170.13212890625, "completions/mean_terminated_length": 170.13212890625, "completions/min_length": 85.6, "completions/min_terminated_length": 85.6, "epoch": 0.688, "grad_norm": 0.0009384833392687142, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 721475012.0, "reward": 1.040995454788208, "reward_std": 0.06858121380209922, "rewards/accuracy_reward": 0.6099609375, "rewards/brier_reward": 0.8156073451042175, "rewards/confidence_uniqueness_reward": 0.9473793029785156, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002881459705531597, "rewards/frontier_coverage_1": 0.14842391461133958, "rewards/frontier_coverage_10": 0.14842391461133958, "rewards/frontier_coverage_15": 0.14842391461133958, "rewards/frontier_coverage_20": 0.1398467630147934, "rewards/frontier_coverage_25": 0.09119481742382049, "rewards/frontier_coverage_5": 0.14842391461133958, "rewards/frontier_ece_reward": 0.007443835772573948, "signal/accuracy_reward/centered_abs_mean": 0.090283203125, "signal/accuracy_reward/group_std_mean": 0.12654573768377303, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451416015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0451416015625, "signal/advantage_abs_mean": 0.04979099705815315, "signal/advantage_pre_scale_abs_mean": 0.04979099705815315, "signal/advantage_pre_scale_std": 0.09680136144161225, "signal/advantage_std": 0.09680136144161225, "signal/brier_reward/centered_abs_mean": 0.1112976461648941, "signal/brier_reward/group_std_mean": 0.14523135423660277, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013912205770611762, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013912205770611762, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024501824378967287, "signal/confidence_uniqueness_reward/group_std_mean": 0.03078327625989914, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003062728047370911, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003062728047370911, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023349984083324673, "signal/frontier_aurc_reward/group_std_mean": 0.0037766343681141733, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1796470031840724e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1796470031840724e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15063327848911284, "signal/frontier_coverage_1/group_std_mean": 0.19600337147712707, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026963357347995045, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026963357347995045, "signal/frontier_coverage_10/centered_abs_mean": 0.15063327848911284, "signal/frontier_coverage_10/group_std_mean": 0.19600337147712707, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026963357347995045, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026963357347995045, "signal/frontier_coverage_15/centered_abs_mean": 0.15063327848911284, "signal/frontier_coverage_15/group_std_mean": 0.19600337147712707, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026963357347995045, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026963357347995045, "signal/frontier_coverage_20/centered_abs_mean": 0.14134447574615477, "signal/frontier_coverage_20/group_std_mean": 0.18422182500362397, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002530066017061472, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002530066017061472, "signal/frontier_coverage_25/centered_abs_mean": 0.08790470957756043, "signal/frontier_coverage_25/group_std_mean": 0.11471493542194366, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015734942629933358, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015734942629933358, "signal/frontier_coverage_5/centered_abs_mean": 0.15063327848911284, "signal/frontier_coverage_5/group_std_mean": 0.19600337147712707, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026963357347995045, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026963357347995045, "signal/frontier_ece_reward/centered_abs_mean": 0.007152719609439373, "signal/frontier_ece_reward/group_std_mean": 0.009025894477963448, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008940899511799216, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008940899511799216, "step": 215 }, { "calibration/aurc": 0.27627447367657487, "calibration/batch_distribution_entropy": 0.8754722884416527, "calibration/buffer_distribution_entropy": 0.9454094123206801, "calibration/confidence_entropy": 0.39629469442123594, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.079296875, "calibration/coverage@15%": 0.17578125, "calibration/coverage@20%": 0.2421875, "calibration/coverage@25%": 0.4421875, "calibration/coverage@30%": 0.6328125, "calibration/coverage@5%": 0.0203125, "calibration/ece": 0.11530746035844426, "calibration/mean_confidence": 0.5567940371824305, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/max_terminated_length": 395.0, "completions/mean_length": 170.17822265625, "completions/mean_terminated_length": 170.17822265625, "completions/min_length": 81.4, "completions/min_terminated_length": 81.4, "epoch": 0.704, "grad_norm": 0.0008490359177812934, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 738083781.0, "reward": 1.036322784423828, "reward_std": 0.06404575407505035, "rewards/accuracy_reward": 0.59794921875, "rewards/brier_reward": 0.8232455253601074, "rewards/confidence_uniqueness_reward": 0.9483070373535156, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0026525924913585186, "rewards/frontier_coverage_1": 0.15115799009799957, "rewards/frontier_coverage_10": 0.15115799009799957, "rewards/frontier_coverage_15": 0.15115799009799957, "rewards/frontier_coverage_20": 0.14155119955539702, "rewards/frontier_coverage_25": 0.09430107474327087, "rewards/frontier_coverage_5": 0.15115799009799957, "rewards/frontier_ece_reward": 0.007255460135638714, "signal/accuracy_reward/centered_abs_mean": 0.079205322265625, "signal/accuracy_reward/group_std_mean": 0.10510388016700745, "signal/accuracy_reward/group_zero_std_frac": 0.7, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0396026611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0396026611328125, "signal/advantage_abs_mean": 0.04904469549655914, "signal/advantage_pre_scale_abs_mean": 0.04904469549655914, "signal/advantage_pre_scale_std": 0.09390701353549957, "signal/advantage_std": 0.09390701353549957, "signal/brier_reward/centered_abs_mean": 0.11579828858375549, "signal/brier_reward/group_std_mean": 0.1494680106639862, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014474786072969436, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014474786072969436, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023987340927124023, "signal/confidence_uniqueness_reward/group_std_mean": 0.03076120503246784, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002998417615890503, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002998417615890503, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020967354532331227, "signal/frontier_aurc_reward/group_std_mean": 0.0033214128809049724, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.753156343009323e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.753156343009323e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14697804301977158, "signal/frontier_coverage_1/group_std_mean": 0.19314327836036682, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002630906878039241, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002630906878039241, "signal/frontier_coverage_10/centered_abs_mean": 0.14697804301977158, "signal/frontier_coverage_10/group_std_mean": 0.19314327836036682, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002630906878039241, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002630906878039241, "signal/frontier_coverage_15/centered_abs_mean": 0.14697804301977158, "signal/frontier_coverage_15/group_std_mean": 0.19314327836036682, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002630906878039241, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002630906878039241, "signal/frontier_coverage_20/centered_abs_mean": 0.1339241683483124, "signal/frontier_coverage_20/group_std_mean": 0.17631075382232667, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002397242630831897, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002397242630831897, "signal/frontier_coverage_25/centered_abs_mean": 0.0837186723947525, "signal/frontier_coverage_25/group_std_mean": 0.11048696041107178, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014985641930252314, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014985641930252314, "signal/frontier_coverage_5/centered_abs_mean": 0.14697804301977158, "signal/frontier_coverage_5/group_std_mean": 0.19314327836036682, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002630906878039241, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002630906878039241, "signal/frontier_ece_reward/centered_abs_mean": 0.006838279590010643, "signal/frontier_ece_reward/group_std_mean": 0.008609758876264095, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008547849487513304, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008547849487513304, "step": 220 }, { "calibration/aurc": 0.24601851264573998, "calibration/batch_distribution_entropy": 0.895164252233965, "calibration/buffer_distribution_entropy": 0.9445043289902813, "calibration/confidence_entropy": 0.39696186060770017, "calibration/coverage@0%": 0.008984375, "calibration/coverage@1%": 0.008984375, "calibration/coverage@10%": 0.200390625, "calibration/coverage@15%": 0.3, "calibration/coverage@20%": 0.50625, "calibration/coverage@25%": 0.580078125, "calibration/coverage@30%": 0.65234375, "calibration/coverage@5%": 0.077734375, "calibration/ece": 0.13094241397917852, "calibration/mean_confidence": 0.5467713178315746, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 625.0, "completions/max_terminated_length": 410.8, "completions/mean_length": 171.49765625, "completions/mean_terminated_length": 171.36477355957032, "completions/min_length": 83.8, "completions/min_terminated_length": 83.8, "epoch": 0.72, "grad_norm": 0.001135468017309904, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 754849773.0, "reward": 1.049725556373596, "reward_std": 0.06837212964892388, "rewards/accuracy_reward": 0.62744140625, "rewards/brier_reward": 0.8200330138206482, "rewards/confidence_uniqueness_reward": 0.9464787244796753, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00218997981864959, "rewards/frontier_coverage_1": 0.14537932425737382, "rewards/frontier_coverage_10": 0.14537932425737382, "rewards/frontier_coverage_15": 0.14537932425737382, "rewards/frontier_coverage_20": 0.1319506511092186, "rewards/frontier_coverage_25": 0.08901111930608749, "rewards/frontier_coverage_5": 0.14537932425737382, "rewards/frontier_ece_reward": 0.0073161104694008825, "signal/accuracy_reward/centered_abs_mean": 0.088348388671875, "signal/accuracy_reward/group_std_mean": 0.12076024860143661, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441741943359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0441741943359375, "signal/advantage_abs_mean": 0.050909781455993654, "signal/advantage_pre_scale_abs_mean": 0.050909781455993654, "signal/advantage_pre_scale_std": 0.09785163402557373, "signal/advantage_std": 0.09785163402557373, "signal/brier_reward/centered_abs_mean": 0.11528852880001068, "signal/brier_reward/group_std_mean": 0.14847175776958466, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014411066100001335, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014411066100001335, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024321822077035905, "signal/confidence_uniqueness_reward/group_std_mean": 0.030850404873490334, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003040227759629488, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003040227759629488, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016824970487505197, "signal/frontier_aurc_reward/group_std_mean": 0.002684881491586566, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0116694688331336e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0116694688331336e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14414913952350616, "signal/frontier_coverage_1/group_std_mean": 0.19248581528663636, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002580269519239664, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002580269519239664, "signal/frontier_coverage_10/centered_abs_mean": 0.14414913952350616, "signal/frontier_coverage_10/group_std_mean": 0.19248581528663636, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002580269519239664, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002580269519239664, "signal/frontier_coverage_15/centered_abs_mean": 0.14414913952350616, "signal/frontier_coverage_15/group_std_mean": 0.19248581528663636, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002580269519239664, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002580269519239664, "signal/frontier_coverage_20/centered_abs_mean": 0.12617649585008622, "signal/frontier_coverage_20/group_std_mean": 0.16896833181381227, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002258559106849134, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002258559106849134, "signal/frontier_coverage_25/centered_abs_mean": 0.0776784896850586, "signal/frontier_coverage_25/group_std_mean": 0.10378318727016449, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013904449297115207, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013904449297115207, "signal/frontier_coverage_5/centered_abs_mean": 0.14414913952350616, "signal/frontier_coverage_5/group_std_mean": 0.19248581528663636, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002580269519239664, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002580269519239664, "signal/frontier_ece_reward/centered_abs_mean": 0.006297392770648003, "signal/frontier_ece_reward/group_std_mean": 0.007990476116538048, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007871740963310003, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007871740963310003, "step": 225 }, { "calibration/aurc": 0.26690781977820344, "calibration/batch_distribution_entropy": 0.891574067576345, "calibration/buffer_distribution_entropy": 0.944466150426838, "calibration/confidence_entropy": 0.3951382923036295, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.106640625, "calibration/coverage@15%": 0.30625, "calibration/coverage@20%": 0.421484375, "calibration/coverage@25%": 0.482421875, "calibration/coverage@30%": 0.601171875, "calibration/coverage@5%": 0.058203125, "calibration/ece": 0.16398913550545838, "calibration/mean_confidence": 0.5757356746185531, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.4, "completions/max_terminated_length": 419.4, "completions/mean_length": 169.4328125, "completions/mean_terminated_length": 169.4328125, "completions/min_length": 85.2, "completions/min_terminated_length": 85.2, "epoch": 0.736, "grad_norm": 0.0008367888513021171, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 771524349.0, "reward": 1.0533449411392213, "reward_std": 0.06190124675631523, "rewards/accuracy_reward": 0.63642578125, "rewards/brier_reward": 0.8217647790908813, "rewards/confidence_uniqueness_reward": 0.94322509765625, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0025330462027341127, "rewards/frontier_coverage_1": 0.13909566402435303, "rewards/frontier_coverage_10": 0.13909566402435303, "rewards/frontier_coverage_15": 0.13909566402435303, "rewards/frontier_coverage_20": 0.12320514023303986, "rewards/frontier_coverage_25": 0.08486966341733933, "rewards/frontier_coverage_5": 0.13909566402435303, "rewards/frontier_ece_reward": 0.006958847213536501, "signal/accuracy_reward/centered_abs_mean": 0.078021240234375, "signal/accuracy_reward/group_std_mean": 0.10577622652053834, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0390106201171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0390106201171875, "signal/advantage_abs_mean": 0.04666791334748268, "signal/advantage_pre_scale_abs_mean": 0.04666791334748268, "signal/advantage_pre_scale_std": 0.08996414840221405, "signal/advantage_std": 0.08996414840221405, "signal/brier_reward/centered_abs_mean": 0.11445859372615814, "signal/brier_reward/group_std_mean": 0.15056610703468323, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014307324215769768, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014307324215769768, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027132463455200196, "signal/confidence_uniqueness_reward/group_std_mean": 0.034345941990613936, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033915579319000245, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033915579319000245, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018936245003715157, "signal/frontier_aurc_reward/group_std_mean": 0.002985938685014844, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.389587946003303e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.389587946003303e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14948874711990356, "signal/frontier_coverage_1/group_std_mean": 0.1956830859184265, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002675848500803113, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002675848500803113, "signal/frontier_coverage_10/centered_abs_mean": 0.14948874711990356, "signal/frontier_coverage_10/group_std_mean": 0.1956830859184265, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002675848500803113, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002675848500803113, "signal/frontier_coverage_15/centered_abs_mean": 0.14948874711990356, "signal/frontier_coverage_15/group_std_mean": 0.1956830859184265, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002675848500803113, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002675848500803113, "signal/frontier_coverage_20/centered_abs_mean": 0.12488599568605423, "signal/frontier_coverage_20/group_std_mean": 0.1639753460884094, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022354592569172383, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022354592569172383, "signal/frontier_coverage_25/centered_abs_mean": 0.07906675487756729, "signal/frontier_coverage_25/group_std_mean": 0.10347287952899933, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014152948977425694, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014152948977425694, "signal/frontier_coverage_5/centered_abs_mean": 0.14948874711990356, "signal/frontier_coverage_5/group_std_mean": 0.1956830859184265, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002675848500803113, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002675848500803113, "signal/frontier_ece_reward/centered_abs_mean": 0.006176774390041828, "signal/frontier_ece_reward/group_std_mean": 0.007886088266968728, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007720967987552285, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007720967987552285, "step": 230 }, { "calibration/aurc": 0.2508649857745767, "calibration/batch_distribution_entropy": 0.8772631690994259, "calibration/buffer_distribution_entropy": 0.9435599844658459, "calibration/confidence_entropy": 0.38182445660178704, "calibration/coverage@0%": 0.005079653864970645, "calibration/coverage@1%": 0.005079653864970645, "calibration/coverage@10%": 0.19609527886497063, "calibration/coverage@15%": 0.3114305589530333, "calibration/coverage@20%": 0.45413175758317026, "calibration/coverage@25%": 0.5537885273972603, "calibration/coverage@30%": 0.6596868884540117, "calibration/coverage@5%": 0.08867340386497065, "calibration/ece": 0.11011742914387986, "calibration/mean_confidence": 0.5086671170485625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 638.4, "completions/max_terminated_length": 399.0, "completions/mean_length": 168.17314453125, "completions/mean_terminated_length": 168.03954467773437, "completions/min_length": 82.8, "completions/min_terminated_length": 82.8, "epoch": 0.752, "grad_norm": 0.0008424303960055113, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 788473642.0, "reward": 1.0425845623016357, "reward_std": 0.06250675097107887, "rewards/accuracy_reward": 0.6162109375, "rewards/brier_reward": 0.8180248498916626, "rewards/confidence_uniqueness_reward": 0.9436966180801392, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0028093053959310057, "rewards/frontier_coverage_1": 0.13971500843763351, "rewards/frontier_coverage_10": 0.13971500843763351, "rewards/frontier_coverage_15": 0.13971500843763351, "rewards/frontier_coverage_20": 0.11759312674403191, "rewards/frontier_coverage_25": 0.08073695451021194, "rewards/frontier_coverage_5": 0.13971500843763351, "rewards/frontier_ece_reward": 0.006474507041275501, "signal/accuracy_reward/centered_abs_mean": 0.076611328125, "signal/accuracy_reward/group_std_mean": 0.1029132753610611, "signal/accuracy_reward/group_zero_std_frac": 0.7, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0383056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0383056640625, "signal/advantage_abs_mean": 0.04646777287125588, "signal/advantage_pre_scale_abs_mean": 0.04646777287125588, "signal/advantage_pre_scale_std": 0.09245792478322983, "signal/advantage_std": 0.09245792478322983, "signal/brier_reward/centered_abs_mean": 0.10695935487747192, "signal/brier_reward/group_std_mean": 0.14035816490650177, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01336991935968399, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01336991935968399, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0261497862637043, "signal/confidence_uniqueness_reward/group_std_mean": 0.033126043528318404, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032687232829630374, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032687232829630374, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002035298151895404, "signal/frontier_aurc_reward/group_std_mean": 0.0031914392486214636, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.643183481472079e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.643183481472079e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13366345167160035, "signal/frontier_coverage_1/group_std_mean": 0.17809367179870605, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023925757966935636, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023925757966935636, "signal/frontier_coverage_10/centered_abs_mean": 0.13366345167160035, "signal/frontier_coverage_10/group_std_mean": 0.17809367179870605, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023925757966935636, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023925757966935636, "signal/frontier_coverage_15/centered_abs_mean": 0.13366345167160035, "signal/frontier_coverage_15/group_std_mean": 0.17809367179870605, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023925757966935636, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023925757966935636, "signal/frontier_coverage_20/centered_abs_mean": 0.11026991456747055, "signal/frontier_coverage_20/group_std_mean": 0.14772895574569703, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001973831397481263, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001973831397481263, "signal/frontier_coverage_25/centered_abs_mean": 0.06904419511556625, "signal/frontier_coverage_25/group_std_mean": 0.09221114963293076, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012358910171315074, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012358910171315074, "signal/frontier_coverage_5/centered_abs_mean": 0.13366345167160035, "signal/frontier_coverage_5/group_std_mean": 0.17809367179870605, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023925757966935636, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023925757966935636, "signal/frontier_ece_reward/centered_abs_mean": 0.005866312328726054, "signal/frontier_ece_reward/group_std_mean": 0.007535163220018149, "signal/frontier_ece_reward/group_zero_std_frac": 0.034375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007332890410907567, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007332890410907567, "step": 235 }, { "calibration/aurc": 0.2692653845277622, "calibration/batch_distribution_entropy": 0.9305069965374951, "calibration/buffer_distribution_entropy": 0.9422891039724725, "calibration/confidence_entropy": 0.4174535256865025, "calibration/coverage@0%": 0.04609375, "calibration/coverage@1%": 0.04609375, "calibration/coverage@10%": 0.2765625, "calibration/coverage@15%": 0.321484375, "calibration/coverage@20%": 0.384375, "calibration/coverage@25%": 0.4265625, "calibration/coverage@30%": 0.4875, "calibration/coverage@5%": 0.20546875, "calibration/ece": 0.18388124767598565, "calibration/mean_confidence": 0.5307756600477634, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/max_terminated_length": 408.0, "completions/mean_length": 175.7646484375, "completions/mean_terminated_length": 175.7646484375, "completions/min_length": 87.8, "completions/min_terminated_length": 87.8, "epoch": 0.768, "grad_norm": 0.0009248699061572552, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 805206176.0, "reward": 1.0395651578903198, "reward_std": 0.06530485600233078, "rewards/accuracy_reward": 0.59873046875, "rewards/brier_reward": 0.8308544993400574, "rewards/confidence_uniqueness_reward": 0.9474327087402343, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002693426189944148, "rewards/frontier_coverage_1": 0.17641193866729737, "rewards/frontier_coverage_10": 0.17641193866729737, "rewards/frontier_coverage_15": 0.17641193866729737, "rewards/frontier_coverage_20": 0.1520010009407997, "rewards/frontier_coverage_25": 0.0975383996963501, "rewards/frontier_coverage_5": 0.17641193866729737, "rewards/frontier_ece_reward": 0.006915272772312164, "signal/accuracy_reward/centered_abs_mean": 0.080853271484375, "signal/accuracy_reward/group_std_mean": 0.1111733928322792, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0404266357421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0404266357421875, "signal/advantage_abs_mean": 0.04827382862567901, "signal/advantage_pre_scale_abs_mean": 0.04827382862567901, "signal/advantage_pre_scale_std": 0.09414819777011871, "signal/advantage_std": 0.09414819777011871, "signal/brier_reward/centered_abs_mean": 0.11026288121938706, "signal/brier_reward/group_std_mean": 0.14330510795116425, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013782860152423382, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013782860152423382, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02446780204772949, "signal/confidence_uniqueness_reward/group_std_mean": 0.0311261810362339, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030584752559661863, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030584752559661863, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019394330214709044, "signal/frontier_aurc_reward/group_std_mean": 0.0028698711190372705, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.471585005172528e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.471585005172528e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14197806715965272, "signal/frontier_coverage_1/group_std_mean": 0.1858145385980606, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002541407197713852, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002541407197713852, "signal/frontier_coverage_10/centered_abs_mean": 0.14197806715965272, "signal/frontier_coverage_10/group_std_mean": 0.1858145385980606, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002541407197713852, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002541407197713852, "signal/frontier_coverage_15/centered_abs_mean": 0.14197806715965272, "signal/frontier_coverage_15/group_std_mean": 0.1858145385980606, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002541407197713852, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002541407197713852, "signal/frontier_coverage_20/centered_abs_mean": 0.1180332601070404, "signal/frontier_coverage_20/group_std_mean": 0.15438797175884247, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002112795226275921, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002112795226275921, "signal/frontier_coverage_25/centered_abs_mean": 0.0736841842532158, "signal/frontier_coverage_25/group_std_mean": 0.0960970863699913, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013189468532800674, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013189468532800674, "signal/frontier_coverage_5/centered_abs_mean": 0.14197806715965272, "signal/frontier_coverage_5/group_std_mean": 0.1858145385980606, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002541407197713852, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002541407197713852, "signal/frontier_ece_reward/centered_abs_mean": 0.005718752928078175, "signal/frontier_ece_reward/group_std_mean": 0.007219527196139097, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007148441160097718, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007148441160097718, "step": 240 }, { "calibration/aurc": 0.33616682511869883, "calibration/batch_distribution_entropy": 0.8847108969382628, "calibration/buffer_distribution_entropy": 0.9401263071041226, "calibration/confidence_entropy": 0.39040401656386514, "calibration/coverage@0%": 0.019921875, "calibration/coverage@1%": 0.019921875, "calibration/coverage@10%": 0.1421875, "calibration/coverage@15%": 0.270703125, "calibration/coverage@20%": 0.309375, "calibration/coverage@25%": 0.351953125, "calibration/coverage@30%": 0.399609375, "calibration/coverage@5%": 0.086328125, "calibration/ece": 0.18135499849271383, "calibration/mean_confidence": 0.5841278451007786, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.4, "completions/max_terminated_length": 431.4, "completions/mean_length": 179.7685546875, "completions/mean_terminated_length": 179.7685546875, "completions/min_length": 91.8, "completions/min_terminated_length": 91.8, "epoch": 0.784, "grad_norm": 0.0009112543775700033, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 822221374.0, "reward": 1.0436746239662171, "reward_std": 0.06336919069290162, "rewards/accuracy_reward": 0.62392578125, "rewards/brier_reward": 0.8084685683250428, "rewards/confidence_uniqueness_reward": 0.9493904113769531, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0032422452699393035, "rewards/frontier_coverage_1": 0.11677031964063644, "rewards/frontier_coverage_10": 0.11677031964063644, "rewards/frontier_coverage_15": 0.11677031964063644, "rewards/frontier_coverage_20": 0.0989714041352272, "rewards/frontier_coverage_25": 0.06778252124786377, "rewards/frontier_coverage_5": 0.11677031964063644, "rewards/frontier_ece_reward": 0.0055341293103992936, "signal/accuracy_reward/centered_abs_mean": 0.076141357421875, "signal/accuracy_reward/group_std_mean": 0.10507323890924454, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0380706787109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0380706787109375, "signal/advantage_abs_mean": 0.04689319357275963, "signal/advantage_pre_scale_abs_mean": 0.04689319357275963, "signal/advantage_pre_scale_std": 0.09247228652238845, "signal/advantage_std": 0.09247228652238845, "signal/brier_reward/centered_abs_mean": 0.10638263672590256, "signal/brier_reward/group_std_mean": 0.13932308405637742, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01329782959073782, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01329782959073782, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024085187911987306, "signal/confidence_uniqueness_reward/group_std_mean": 0.030663982033729553, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030106484889984133, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030106484889984133, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024794211611151697, "signal/frontier_aurc_reward/group_std_mean": 0.0039051207713782787, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.438163814484142e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.438163814484142e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.130580872297287, "signal/frontier_coverage_1/group_std_mean": 0.17106756269931794, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002337397518567741, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002337397518567741, "signal/frontier_coverage_10/centered_abs_mean": 0.130580872297287, "signal/frontier_coverage_10/group_std_mean": 0.17106756269931794, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002337397518567741, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002337397518567741, "signal/frontier_coverage_15/centered_abs_mean": 0.130580872297287, "signal/frontier_coverage_15/group_std_mean": 0.17106756269931794, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002337397518567741, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002337397518567741, "signal/frontier_coverage_20/centered_abs_mean": 0.10095408111810684, "signal/frontier_coverage_20/group_std_mean": 0.13289882242679596, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018070780904963612, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018070780904963612, "signal/frontier_coverage_25/centered_abs_mean": 0.06456505954265594, "signal/frontier_coverage_25/group_std_mean": 0.08433714807033539, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011557145044207573, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011557145044207573, "signal/frontier_coverage_5/centered_abs_mean": 0.130580872297287, "signal/frontier_coverage_5/group_std_mean": 0.17106756269931794, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002337397518567741, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002337397518567741, "signal/frontier_ece_reward/centered_abs_mean": 0.005532194208353758, "signal/frontier_ece_reward/group_std_mean": 0.006969755701720715, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006915242760442197, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006915242760442197, "step": 245 }, { "calibration/aurc": 0.2185446445677223, "calibration/batch_distribution_entropy": 0.9204417599936402, "calibration/buffer_distribution_entropy": 0.9386189563456048, "calibration/confidence_entropy": 0.41792748593906587, "calibration/coverage@0%": 0.01796875, "calibration/coverage@1%": 0.01796875, "calibration/coverage@10%": 0.17154629403131114, "calibration/coverage@15%": 0.4205425941780822, "calibration/coverage@20%": 0.5659124266144814, "calibration/coverage@25%": 0.645634326076321, "calibration/coverage@30%": 0.7261267734833659, "calibration/coverage@5%": 0.080078125, "calibration/ece": 0.09575703518133583, "calibration/mean_confidence": 0.5476521104531964, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 858.6, "completions/max_terminated_length": 449.0, "completions/mean_length": 184.08486328125, "completions/mean_terminated_length": 183.82038269042968, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.8, "grad_norm": 0.0015918654389679432, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 839116963.0, "reward": 1.063495922088623, "reward_std": 0.065474983304739, "rewards/accuracy_reward": 0.65166015625, "rewards/brier_reward": 0.8490127921104431, "rewards/confidence_uniqueness_reward": 0.9503978848457336, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0026999707799404858, "rewards/frontier_coverage_1": 0.12567180246114731, "rewards/frontier_coverage_10": 0.12567180246114731, "rewards/frontier_coverage_15": 0.12567180246114731, "rewards/frontier_coverage_20": 0.10079433023929596, "rewards/frontier_coverage_25": 0.07056059390306473, "rewards/frontier_coverage_5": 0.12567180246114731, "rewards/frontier_ece_reward": 0.0065612408332526686, "signal/accuracy_reward/centered_abs_mean": 0.084564208984375, "signal/accuracy_reward/group_std_mean": 0.11188896298408509, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422821044921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0422821044921875, "signal/advantage_abs_mean": 0.049622184783220294, "signal/advantage_pre_scale_abs_mean": 0.049622184783220294, "signal/advantage_pre_scale_std": 0.09784637689590454, "signal/advantage_std": 0.09784637689590454, "signal/brier_reward/centered_abs_mean": 0.09944085478782654, "signal/brier_reward/group_std_mean": 0.12967925816774367, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012430106848478317, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012430106848478317, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023244918510317802, "signal/confidence_uniqueness_reward/group_std_mean": 0.03046169951558113, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029056148137897252, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029056148137897252, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001967226341366768, "signal/frontier_aurc_reward/group_std_mean": 0.0029548021499067544, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.521335020195693e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.521335020195693e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12609002143144607, "signal/frontier_coverage_1/group_std_mean": 0.1646766871213913, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002257011365145445, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002257011365145445, "signal/frontier_coverage_10/centered_abs_mean": 0.12609002143144607, "signal/frontier_coverage_10/group_std_mean": 0.1646766871213913, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002257011365145445, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002257011365145445, "signal/frontier_coverage_15/centered_abs_mean": 0.12609002143144607, "signal/frontier_coverage_15/group_std_mean": 0.1646766871213913, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002257011365145445, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002257011365145445, "signal/frontier_coverage_20/centered_abs_mean": 0.09504708796739578, "signal/frontier_coverage_20/group_std_mean": 0.1250511020421982, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017013428499922157, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017013428499922157, "signal/frontier_coverage_25/centered_abs_mean": 0.060256894677877426, "signal/frontier_coverage_25/group_std_mean": 0.0785814642906189, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010785983293317258, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010785983293317258, "signal/frontier_coverage_5/centered_abs_mean": 0.12609002143144607, "signal/frontier_coverage_5/group_std_mean": 0.1646766871213913, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002257011365145445, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002257011365145445, "signal/frontier_ece_reward/centered_abs_mean": 0.005481574684381485, "signal/frontier_ece_reward/group_std_mean": 0.006954910047352314, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006851968355476856, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006851968355476856, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.5091041142943041, "eval_calibration/batch_distribution_entropy": 0.8943448287266812, "eval_calibration/buffer_distribution_entropy": 0.9384914438088658, "eval_calibration/confidence_entropy": 0.424537314501817, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.0625, "eval_calibration/coverage@20%": 0.1015625, "eval_calibration/coverage@25%": 0.1328125, "eval_calibration/coverage@30%": 0.2109375, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.24470620265151566, "eval_calibration/mean_confidence": 0.5217374526515156, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 342.0, "eval_completions/max_terminated_length": 342.0, "eval_completions/mean_length": 190.5261993408203, "eval_completions/mean_terminated_length": 190.5261993408203, "eval_completions/min_length": 110.0, "eval_completions/min_terminated_length": 110.0, "eval_loss": 0.0, "eval_num_tokens": 839116963.0, "eval_reward": 0.944745734333992, "eval_reward_std": 0.234944935888052, "eval_rewards/accuracy_reward": 0.427734375, "eval_rewards/brier_reward": 0.784925252199173, "eval_rewards/confidence_uniqueness_reward": 0.899658203125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0035588888567872345, "eval_rewards/frontier_coverage_1": 0.21038169413805008, "eval_rewards/frontier_coverage_10": 0.21038169413805008, "eval_rewards/frontier_coverage_15": 0.21038169413805008, "eval_rewards/frontier_coverage_20": 0.16055180132389069, "eval_rewards/frontier_coverage_25": 0.0976751372218132, "eval_rewards/frontier_coverage_5": 0.21038169413805008, "eval_rewards/frontier_ece_reward": 0.005469902069307864, "eval_runtime": 18.6142, "eval_samples_per_second": 26.861, "eval_signal/accuracy_reward/centered_abs_mean": 0.4703369140625, "eval_signal/accuracy_reward/group_std_mean": 0.49209941923618317, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23516845703125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23516845703125, "eval_signal/advantage_abs_mean": 0.21568895503878593, "eval_signal/advantage_pre_scale_abs_mean": 0.21568895503878593, "eval_signal/advantage_pre_scale_std": 0.23243148252367973, "eval_signal/advantage_std": 0.23243148252367973, "eval_signal/brier_reward/centered_abs_mean": 0.2361176684498787, "eval_signal/brier_reward/group_std_mean": 0.28941161930561066, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029514708556234837, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.029514708556234837, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04095458984375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04844135884195566, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00511932373046875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00511932373046875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004551700607407838, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007785420399159193, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.147543940140167e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.147543940140167e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3619851619005203, "eval_signal/frontier_coverage_1/group_std_mean": 0.4468979686498642, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006479534204117954, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006479534204117954, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3619851619005203, "eval_signal/frontier_coverage_10/group_std_mean": 0.4468979686498642, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006479534204117954, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006479534204117954, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3619851619005203, "eval_signal/frontier_coverage_15/group_std_mean": 0.4468979686498642, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006479534204117954, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006479534204117954, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2763536870479584, "eval_signal/frontier_coverage_20/group_std_mean": 0.34432317316532135, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004946730565279722, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004946730565279722, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.15370038896799088, "eval_signal/frontier_coverage_25/group_std_mean": 0.1981576457619667, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002751236781477928, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002751236781477928, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3619851619005203, "eval_signal/frontier_coverage_5/group_std_mean": 0.4468979686498642, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006479534204117954, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006479534204117954, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.009484815411269665, "eval_signal/frontier_ece_reward/group_std_mean": 0.011611438822001219, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001185601926408708, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001185601926408708, "eval_steps_per_second": 0.215, "step": 250 }, { "calibration/aurc": 0.22592958052596118, "calibration/batch_distribution_entropy": 0.8792158677852591, "calibration/buffer_distribution_entropy": 0.9373014248801486, "calibration/confidence_entropy": 0.39230068107730476, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.183203125, "calibration/coverage@15%": 0.30625, "calibration/coverage@20%": 0.41875, "calibration/coverage@25%": 0.70078125, "calibration/coverage@30%": 0.804296875, "calibration/coverage@5%": 0.066796875, "calibration/ece": 0.11634392264448672, "calibration/mean_confidence": 0.5905225369121635, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.4, "completions/max_terminated_length": 419.4, "completions/mean_length": 188.4681640625, "completions/mean_terminated_length": 188.4681640625, "completions/min_length": 96.8, "completions/min_terminated_length": 96.8, "epoch": 0.816, "grad_norm": 0.0009058048599399626, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 856146045.0, "reward": 1.0458628177642821, "reward_std": 0.06390021666884423, "rewards/accuracy_reward": 0.631640625, "rewards/brier_reward": 0.8064838409423828, "rewards/confidence_uniqueness_reward": 0.9514259338378906, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.00283654413651675, "rewards/frontier_coverage_1": 0.0995137207210064, "rewards/frontier_coverage_10": 0.0995137207210064, "rewards/frontier_coverage_15": 0.0995137207210064, "rewards/frontier_coverage_20": 0.084238101541996, "rewards/frontier_coverage_25": 0.05991590246558189, "rewards/frontier_coverage_5": 0.0995137207210064, "rewards/frontier_ece_reward": 0.00519214584492147, "signal/accuracy_reward/centered_abs_mean": 0.07982177734375, "signal/accuracy_reward/group_std_mean": 0.11125250309705734, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039910888671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039910888671875, "signal/advantage_abs_mean": 0.046833574771881104, "signal/advantage_pre_scale_abs_mean": 0.046833574771881104, "signal/advantage_pre_scale_std": 0.09199159741401672, "signal/advantage_std": 0.09199159741401672, "signal/brier_reward/centered_abs_mean": 0.11039517223834991, "signal/brier_reward/group_std_mean": 0.1410120666027069, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013799396529793739, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013799396529793739, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02168407440185547, "signal/confidence_uniqueness_reward/group_std_mean": 0.027639732882380485, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027105093002319338, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027105093002319338, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.00219038394279778, "signal/frontier_aurc_reward/group_std_mean": 0.0035009294748306274, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.920787130482495e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.920787130482495e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1411260485649109, "signal/frontier_coverage_1/group_std_mean": 0.18258497714996338, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025261562783271073, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025261562783271073, "signal/frontier_coverage_10/centered_abs_mean": 0.1411260485649109, "signal/frontier_coverage_10/group_std_mean": 0.18258497714996338, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025261562783271073, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025261562783271073, "signal/frontier_coverage_15/centered_abs_mean": 0.1411260485649109, "signal/frontier_coverage_15/group_std_mean": 0.18258497714996338, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025261562783271073, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025261562783271073, "signal/frontier_coverage_20/centered_abs_mean": 0.10667684972286225, "signal/frontier_coverage_20/group_std_mean": 0.13872886300086976, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019095155643299223, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019095155643299223, "signal/frontier_coverage_25/centered_abs_mean": 0.06732679009437562, "signal/frontier_coverage_25/group_std_mean": 0.08767161518335342, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012051495257765054, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012051495257765054, "signal/frontier_coverage_5/centered_abs_mean": 0.1411260485649109, "signal/frontier_coverage_5/group_std_mean": 0.18258497714996338, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025261562783271073, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025261562783271073, "signal/frontier_ece_reward/centered_abs_mean": 0.005312436446547508, "signal/frontier_ece_reward/group_std_mean": 0.006751040741801262, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006640545558184386, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006640545558184386, "step": 255 }, { "calibration/aurc": 0.28655826197779344, "calibration/batch_distribution_entropy": 0.9274159649176383, "calibration/buffer_distribution_entropy": 0.9362063686127197, "calibration/confidence_entropy": 0.4270325624699677, "calibration/coverage@0%": 0.025390625, "calibration/coverage@1%": 0.025390625, "calibration/coverage@10%": 0.219921875, "calibration/coverage@15%": 0.266796875, "calibration/coverage@20%": 0.330078125, "calibration/coverage@25%": 0.38671875, "calibration/coverage@30%": 0.50546875, "calibration/coverage@5%": 0.190625, "calibration/ece": 0.12614358852833463, "calibration/mean_confidence": 0.508274267709433, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.2, "completions/max_terminated_length": 452.2, "completions/mean_length": 195.576171875, "completions/mean_terminated_length": 195.576171875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.832, "grad_norm": 0.0007307277410291135, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 873157097.0, "reward": 1.0517502784729005, "reward_std": 0.0625480704009533, "rewards/accuracy_reward": 0.625390625, "rewards/brier_reward": 0.8416979908943176, "rewards/confidence_uniqueness_reward": 0.9532333374023437, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0020900500006973744, "rewards/frontier_coverage_1": 0.14690827578306198, "rewards/frontier_coverage_10": 0.14690827578306198, "rewards/frontier_coverage_15": 0.14690827578306198, "rewards/frontier_coverage_20": 0.11412490308284759, "rewards/frontier_coverage_25": 0.0797498419880867, "rewards/frontier_coverage_5": 0.14690827578306198, "rewards/frontier_ece_reward": 0.005895926151424646, "signal/accuracy_reward/centered_abs_mean": 0.08516845703125, "signal/accuracy_reward/group_std_mean": 0.11420958936214447, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042584228515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042584228515625, "signal/advantage_abs_mean": 0.04753379821777344, "signal/advantage_pre_scale_abs_mean": 0.04753379821777344, "signal/advantage_pre_scale_std": 0.09404327720403671, "signal/advantage_std": 0.09404327720403671, "signal/brier_reward/centered_abs_mean": 0.09486477375030518, "signal/brier_reward/group_std_mean": 0.1234696313738823, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011858096718788147, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011858096718788147, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0194500207901001, "signal/confidence_uniqueness_reward/group_std_mean": 0.024607939645648003, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024312525987625123, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024312525987625123, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014977958402596415, "signal/frontier_aurc_reward/group_std_mean": 0.0023715029004961253, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6810545386979355e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6810545386979355e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1350954830646515, "signal/frontier_coverage_1/group_std_mean": 0.17487715780735016, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024182090070098638, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024182090070098638, "signal/frontier_coverage_10/centered_abs_mean": 0.1350954830646515, "signal/frontier_coverage_10/group_std_mean": 0.17487715780735016, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024182090070098638, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024182090070098638, "signal/frontier_coverage_15/centered_abs_mean": 0.1350954830646515, "signal/frontier_coverage_15/group_std_mean": 0.17487715780735016, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024182090070098638, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024182090070098638, "signal/frontier_coverage_20/centered_abs_mean": 0.1005860447883606, "signal/frontier_coverage_20/group_std_mean": 0.13043999224901198, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001800490147434175, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001800490147434175, "signal/frontier_coverage_25/centered_abs_mean": 0.06362877637147904, "signal/frontier_coverage_25/group_std_mean": 0.0820870503783226, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011389550636522472, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011389550636522472, "signal/frontier_coverage_5/centered_abs_mean": 0.1350954830646515, "signal/frontier_coverage_5/group_std_mean": 0.17487715780735016, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024182090070098638, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024182090070098638, "signal/frontier_ece_reward/centered_abs_mean": 0.004868951346725226, "signal/frontier_ece_reward/group_std_mean": 0.006263002008199692, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006086189183406532, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006086189183406532, "step": 260 }, { "calibration/aurc": 0.3079556619466057, "calibration/batch_distribution_entropy": 0.9202860468697315, "calibration/buffer_distribution_entropy": 0.9368981299208997, "calibration/confidence_entropy": 0.4463013291048109, "calibration/coverage@0%": 0.04140625, "calibration/coverage@1%": 0.05234375, "calibration/coverage@10%": 0.193359375, "calibration/coverage@15%": 0.3015625, "calibration/coverage@20%": 0.46484375, "calibration/coverage@25%": 0.520703125, "calibration/coverage@30%": 0.5625, "calibration/coverage@5%": 0.1359375, "calibration/ece": 0.1549067936068524, "calibration/mean_confidence": 0.6070357810612892, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.2, "completions/max_terminated_length": 500.2, "completions/mean_length": 196.15537109375, "completions/mean_terminated_length": 196.15537109375, "completions/min_length": 99.2, "completions/min_terminated_length": 99.2, "epoch": 0.848, "grad_norm": 0.0009925616905093193, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 890180096.0, "reward": 1.0339147329330445, "reward_std": 0.06378009840846062, "rewards/accuracy_reward": 0.59375, "rewards/brier_reward": 0.8303740501403809, "rewards/confidence_uniqueness_reward": 0.9569587707519531, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002599796885624528, "rewards/frontier_coverage_1": 0.13877977132797242, "rewards/frontier_coverage_10": 0.13877977132797242, "rewards/frontier_coverage_15": 0.13877977132797242, "rewards/frontier_coverage_20": 0.10104698985815048, "rewards/frontier_coverage_25": 0.06898890286684037, "rewards/frontier_coverage_5": 0.13877977132797242, "rewards/frontier_ece_reward": 0.0055153296329081055, "signal/accuracy_reward/centered_abs_mean": 0.07659912109375, "signal/accuracy_reward/group_std_mean": 0.10650120824575424, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.038299560546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.038299560546875, "signal/advantage_abs_mean": 0.04687718003988266, "signal/advantage_pre_scale_abs_mean": 0.04687718003988266, "signal/advantage_pre_scale_std": 0.09359803646802903, "signal/advantage_std": 0.09359803646802903, "signal/brier_reward/centered_abs_mean": 0.09589692950248718, "signal/brier_reward/group_std_mean": 0.12390242516994476, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011987116187810898, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011987116187810898, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01778378486633301, "signal/confidence_uniqueness_reward/group_std_mean": 0.0222574207931757, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002222973108291626, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002222973108291626, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019035086035728455, "signal/frontier_aurc_reward/group_std_mean": 0.0029362429399043322, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.407280346436892e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.407280346436892e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12207435816526413, "signal/frontier_coverage_1/group_std_mean": 0.16072221398353576, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002185130910947919, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002185130910947919, "signal/frontier_coverage_10/centered_abs_mean": 0.12207435816526413, "signal/frontier_coverage_10/group_std_mean": 0.16072221398353576, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002185130910947919, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002185130910947919, "signal/frontier_coverage_15/centered_abs_mean": 0.12207435816526413, "signal/frontier_coverage_15/group_std_mean": 0.16072221398353576, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002185130910947919, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002185130910947919, "signal/frontier_coverage_20/centered_abs_mean": 0.09141052961349487, "signal/frontier_coverage_20/group_std_mean": 0.12017861008644104, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016362484311684966, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016362484311684966, "signal/frontier_coverage_25/centered_abs_mean": 0.057944309711456296, "signal/frontier_coverage_25/group_std_mean": 0.07580447942018509, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010372031247243285, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010372031247243285, "signal/frontier_coverage_5/centered_abs_mean": 0.12207435816526413, "signal/frontier_coverage_5/group_std_mean": 0.16072221398353576, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002185130910947919, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002185130910947919, "signal/frontier_ece_reward/centered_abs_mean": 0.004729109071195126, "signal/frontier_ece_reward/group_std_mean": 0.006064791046082973, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005911386338993907, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005911386338993907, "step": 265 }, { "calibration/aurc": 0.24449910733899616, "calibration/batch_distribution_entropy": 0.9046358315553906, "calibration/buffer_distribution_entropy": 0.937494096852156, "calibration/confidence_entropy": 0.4440393999061635, "calibration/coverage@0%": 0.016019447162426613, "calibration/coverage@1%": 0.016019447162426613, "calibration/coverage@10%": 0.22835127201565558, "calibration/coverage@15%": 0.29830372431506846, "calibration/coverage@20%": 0.39177164872798437, "calibration/coverage@25%": 0.46211472602739717, "calibration/coverage@30%": 0.6633561643835616, "calibration/coverage@5%": 0.1157648911448141, "calibration/ece": 0.14796933480533733, "calibration/mean_confidence": 0.6401590401083131, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 901.0, "completions/max_terminated_length": 610.4, "completions/mean_length": 203.5150390625, "completions/mean_terminated_length": 203.2554504394531, "completions/min_length": 93.4, "completions/min_terminated_length": 93.4, "epoch": 0.864, "grad_norm": 0.001151371281594038, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 907250906.0, "reward": 1.0528210639953612, "reward_std": 0.06563054919242858, "rewards/accuracy_reward": 0.63896484375, "rewards/brier_reward": 0.8275640845298767, "rewards/confidence_uniqueness_reward": 0.9524169325828552, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002205755584873259, "rewards/frontier_coverage_1": 0.10852386504411697, "rewards/frontier_coverage_10": 0.10852386504411697, "rewards/frontier_coverage_15": 0.10827866345643997, "rewards/frontier_coverage_20": 0.08587422221899033, "rewards/frontier_coverage_25": 0.06290318444371223, "rewards/frontier_coverage_5": 0.10852386504411697, "rewards/frontier_ece_reward": 0.0051742102019488815, "signal/accuracy_reward/centered_abs_mean": 0.087860107421875, "signal/accuracy_reward/group_std_mean": 0.1141038790345192, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0439300537109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0439300537109375, "signal/advantage_abs_mean": 0.05022150054574013, "signal/advantage_pre_scale_abs_mean": 0.05022150054574013, "signal/advantage_pre_scale_std": 0.0995995968580246, "signal/advantage_std": 0.0995995968580246, "signal/brier_reward/centered_abs_mean": 0.10091617107391357, "signal/brier_reward/group_std_mean": 0.1313829392194748, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012614521384239196, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012614521384239196, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020576045289635657, "signal/confidence_uniqueness_reward/group_std_mean": 0.026751379668712615, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002572005661204457, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002572005661204457, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017056349897757173, "signal/frontier_aurc_reward/group_std_mean": 0.0027086624410003423, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.053086475119926e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.053086475119926e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12970556020736695, "signal/frontier_coverage_1/group_std_mean": 0.1699573963880539, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023217292502522467, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023217292502522467, "signal/frontier_coverage_10/centered_abs_mean": 0.12970556020736695, "signal/frontier_coverage_10/group_std_mean": 0.1699573963880539, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023217292502522467, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023217292502522467, "signal/frontier_coverage_15/centered_abs_mean": 0.1289975494146347, "signal/frontier_coverage_15/group_std_mean": 0.16906480193138124, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002309055905789137, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002309055905789137, "signal/frontier_coverage_20/centered_abs_mean": 0.09495823979377746, "signal/frontier_coverage_20/group_std_mean": 0.1250176891684532, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016997524769976735, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016997524769976735, "signal/frontier_coverage_25/centered_abs_mean": 0.06021154895424843, "signal/frontier_coverage_25/group_std_mean": 0.07897393554449081, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001077786646783352, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001077786646783352, "signal/frontier_coverage_5/centered_abs_mean": 0.12970556020736695, "signal/frontier_coverage_5/group_std_mean": 0.1699573963880539, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023217292502522467, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023217292502522467, "signal/frontier_ece_reward/centered_abs_mean": 0.004722311254590749, "signal/frontier_ece_reward/group_std_mean": 0.006080184411257505, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005902889068238437, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005902889068238437, "step": 270 }, { "calibration/aurc": 0.3413525891312035, "calibration/batch_distribution_entropy": 0.9247650910176233, "calibration/buffer_distribution_entropy": 0.9369284539687136, "calibration/confidence_entropy": 0.40555091128192194, "calibration/coverage@0%": 0.012109375, "calibration/coverage@1%": 0.012109375, "calibration/coverage@10%": 0.067578125, "calibration/coverage@15%": 0.103125, "calibration/coverage@20%": 0.149609375, "calibration/coverage@25%": 0.209765625, "calibration/coverage@30%": 0.465234375, "calibration/coverage@5%": 0.042578125, "calibration/ece": 0.15222579926313382, "calibration/mean_confidence": 0.5201320484059387, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.8, "completions/max_terminated_length": 486.8, "completions/mean_length": 209.0919921875, "completions/mean_terminated_length": 209.0919921875, "completions/min_length": 104.8, "completions/min_terminated_length": 104.8, "epoch": 0.88, "grad_norm": 0.0009639724157750607, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 924539080.0, "reward": 1.0366958379745483, "reward_std": 0.06528096497058869, "rewards/accuracy_reward": 0.60009765625, "rewards/brier_reward": 0.8175089240074158, "rewards/confidence_uniqueness_reward": 0.9496978759765625, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0024166646180674434, "rewards/frontier_coverage_1": 0.1617402657866478, "rewards/frontier_coverage_10": 0.1617402657866478, "rewards/frontier_coverage_15": 0.15886529982089997, "rewards/frontier_coverage_20": 0.11558039635419845, "rewards/frontier_coverage_25": 0.0815199762582779, "rewards/frontier_coverage_5": 0.1617402657866478, "rewards/frontier_ece_reward": 0.0058576924726367, "signal/accuracy_reward/centered_abs_mean": 0.088812255859375, "signal/accuracy_reward/group_std_mean": 0.11949178874492646, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0444061279296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0444061279296875, "signal/advantage_abs_mean": 0.049381940811872485, "signal/advantage_pre_scale_abs_mean": 0.049381940811872485, "signal/advantage_pre_scale_std": 0.0957550585269928, "signal/advantage_std": 0.0957550585269928, "signal/brier_reward/centered_abs_mean": 0.1052006021142006, "signal/brier_reward/group_std_mean": 0.1361723154783249, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013150075264275075, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013150075264275075, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022104668617248534, "signal/confidence_uniqueness_reward/group_std_mean": 0.02765066474676132, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002763083577156067, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002763083577156067, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019552123732864855, "signal/frontier_aurc_reward/group_std_mean": 0.0030503868591040375, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.49983005435206e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.49983005435206e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14229839742183686, "signal/frontier_coverage_1/group_std_mean": 0.18508260250091552, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002547141211107373, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002547141211107373, "signal/frontier_coverage_10/centered_abs_mean": 0.14229839742183686, "signal/frontier_coverage_10/group_std_mean": 0.18508260250091552, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002547141211107373, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002547141211107373, "signal/frontier_coverage_15/centered_abs_mean": 0.1387757331132889, "signal/frontier_coverage_15/group_std_mean": 0.18052529096603392, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024840855039656162, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024840855039656162, "signal/frontier_coverage_20/centered_abs_mean": 0.10256319344043732, "signal/frontier_coverage_20/group_std_mean": 0.13410095870494843, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018358811037614941, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018358811037614941, "signal/frontier_coverage_25/centered_abs_mean": 0.0661829337477684, "signal/frontier_coverage_25/group_std_mean": 0.08609075546264648, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001184674515388906, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001184674515388906, "signal/frontier_coverage_5/centered_abs_mean": 0.14229839742183686, "signal/frontier_coverage_5/group_std_mean": 0.18508260250091552, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002547141211107373, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002547141211107373, "signal/frontier_ece_reward/centered_abs_mean": 0.004747295938432217, "signal/frontier_ece_reward/group_std_mean": 0.00602139700204134, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005934119923040271, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005934119923040271, "step": 275 }, { "calibration/aurc": 0.36725993407894925, "calibration/batch_distribution_entropy": 0.8933985734348454, "calibration/buffer_distribution_entropy": 0.9352886578588653, "calibration/confidence_entropy": 0.39277611637945686, "calibration/coverage@0%": 0.009765625, "calibration/coverage@1%": 0.009765625, "calibration/coverage@10%": 0.0765625, "calibration/coverage@15%": 0.11875, "calibration/coverage@20%": 0.169140625, "calibration/coverage@25%": 0.338671875, "calibration/coverage@30%": 0.43984375, "calibration/coverage@5%": 0.01328125, "calibration/ece": 0.19819743553958977, "calibration/mean_confidence": 0.5616876995536956, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 521.4, "completions/max_terminated_length": 521.4, "completions/mean_length": 218.483984375, "completions/mean_terminated_length": 218.483984375, "completions/min_length": 105.6, "completions/min_terminated_length": 105.6, "epoch": 0.896, "grad_norm": 0.0013427204685285687, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 941887204.0, "reward": 1.034142303466797, "reward_std": 0.061747805774211885, "rewards/accuracy_reward": 0.596484375, "rewards/brier_reward": 0.8217704772949219, "rewards/confidence_uniqueness_reward": 0.9495529174804688, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002957669971510768, "rewards/frontier_coverage_1": 0.14804764688014985, "rewards/frontier_coverage_10": 0.14804764688014985, "rewards/frontier_coverage_15": 0.14638043344020843, "rewards/frontier_coverage_20": 0.1089130237698555, "rewards/frontier_coverage_25": 0.07781351059675216, "rewards/frontier_coverage_5": 0.14804764688014985, "rewards/frontier_ece_reward": 0.004998845653608442, "signal/accuracy_reward/centered_abs_mean": 0.07838134765625, "signal/accuracy_reward/group_std_mean": 0.10633349418640137, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039190673828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039190673828125, "signal/advantage_abs_mean": 0.04575110375881195, "signal/advantage_pre_scale_abs_mean": 0.04575110375881195, "signal/advantage_pre_scale_std": 0.0925728052854538, "signal/advantage_std": 0.0925728052854538, "signal/brier_reward/centered_abs_mean": 0.09987995773553848, "signal/brier_reward/group_std_mean": 0.13066966235637664, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01248499471694231, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01248499471694231, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021440339088439942, "signal/confidence_uniqueness_reward/group_std_mean": 0.02688387930393219, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026800423860549928, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026800423860549928, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002111524622887373, "signal/frontier_aurc_reward/group_std_mean": 0.0032168001867830752, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7796289325342515e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7796289325342515e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13056060820817947, "signal/frontier_coverage_1/group_std_mean": 0.17031558454036713, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023370349314063787, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023370349314063787, "signal/frontier_coverage_10/centered_abs_mean": 0.13056060820817947, "signal/frontier_coverage_10/group_std_mean": 0.17031558454036713, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023370349314063787, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023370349314063787, "signal/frontier_coverage_15/centered_abs_mean": 0.12676671743392945, "signal/frontier_coverage_15/group_std_mean": 0.16541725099086763, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022691241931170223, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022691241931170223, "signal/frontier_coverage_20/centered_abs_mean": 0.09470010697841644, "signal/frontier_coverage_20/group_std_mean": 0.12389432638883591, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016951319063082337, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016951319063082337, "signal/frontier_coverage_25/centered_abs_mean": 0.06315547078847886, "signal/frontier_coverage_25/group_std_mean": 0.08203252255916596, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011304829269647599, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011304829269647599, "signal/frontier_coverage_5/centered_abs_mean": 0.13056060820817947, "signal/frontier_coverage_5/group_std_mean": 0.17031558454036713, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023370349314063787, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023370349314063787, "signal/frontier_ece_reward/centered_abs_mean": 0.0044867975637316706, "signal/frontier_ece_reward/group_std_mean": 0.005752355605363846, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005608496954664588, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005608496954664588, "step": 280 }, { "calibration/aurc": 0.35689277553870924, "calibration/batch_distribution_entropy": 0.9155435810447115, "calibration/buffer_distribution_entropy": 0.9320674021748564, "calibration/confidence_entropy": 0.4032934144385215, "calibration/coverage@0%": 0.016033206947162426, "calibration/coverage@1%": 0.016033206947162426, "calibration/coverage@10%": 0.0832466976516634, "calibration/coverage@15%": 0.1575090203033268, "calibration/coverage@20%": 0.33780271526418787, "calibration/coverage@25%": 0.3948683647260274, "calibration/coverage@30%": 0.4992042257827789, "calibration/coverage@5%": 0.021507307974559686, "calibration/ece": 0.16610141089467823, "calibration/mean_confidence": 0.5162139753059, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 845.8, "completions/max_terminated_length": 672.4, "completions/mean_length": 230.59306640625, "completions/mean_terminated_length": 230.3378448486328, "completions/min_length": 111.2, "completions/min_terminated_length": 111.2, "epoch": 0.912, "grad_norm": 0.0006971288821659982, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 959299773.0, "reward": 1.0343021631240845, "reward_std": 0.06137159615755081, "rewards/accuracy_reward": 0.59765625, "rewards/brier_reward": 0.8208606958389282, "rewards/confidence_uniqueness_reward": 0.9507390022277832, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002479233802296221, "rewards/frontier_coverage_1": 0.1423247776925564, "rewards/frontier_coverage_10": 0.1423247776925564, "rewards/frontier_coverage_15": 0.13940538018941878, "rewards/frontier_coverage_20": 0.10835960805416107, "rewards/frontier_coverage_25": 0.07879922837018967, "rewards/frontier_coverage_5": 0.1423247776925564, "rewards/frontier_ece_reward": 0.005422212090343237, "signal/accuracy_reward/centered_abs_mean": 0.0740234375, "signal/accuracy_reward/group_std_mean": 0.10433268696069717, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03701171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03701171875, "signal/advantage_abs_mean": 0.04508618414402008, "signal/advantage_pre_scale_abs_mean": 0.04508618414402008, "signal/advantage_pre_scale_std": 0.08746702373027801, "signal/advantage_std": 0.08746702373027801, "signal/brier_reward/centered_abs_mean": 0.11010385453701019, "signal/brier_reward/group_std_mean": 0.1430896759033203, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013762981817126273, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013762981817126273, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022147323563694955, "signal/confidence_uniqueness_reward/group_std_mean": 0.029007868096232414, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027684154454618694, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027684154454618694, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001991865341551602, "signal/frontier_aurc_reward/group_std_mean": 0.0031465083360672, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.565438746591099e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.565438746591099e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15080978870391845, "signal/frontier_coverage_1/group_std_mean": 0.19573720097541808, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002699495013803244, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002699495013803244, "signal/frontier_coverage_10/centered_abs_mean": 0.15080978870391845, "signal/frontier_coverage_10/group_std_mean": 0.19573720097541808, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002699495013803244, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002699495013803244, "signal/frontier_coverage_15/centered_abs_mean": 0.14642856270074844, "signal/frontier_coverage_15/group_std_mean": 0.19005082845687865, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026210711803287268, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026210711803287268, "signal/frontier_coverage_20/centered_abs_mean": 0.11272255331277847, "signal/frontier_coverage_20/group_std_mean": 0.14563319385051726, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020177337806671857, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020177337806671857, "signal/frontier_coverage_25/centered_abs_mean": 0.07451429963111877, "signal/frontier_coverage_25/group_std_mean": 0.0956909030675888, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013338059186935424, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013338059186935424, "signal/frontier_coverage_5/centered_abs_mean": 0.15080978870391845, "signal/frontier_coverage_5/group_std_mean": 0.19573720097541808, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002699495013803244, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002699495013803244, "signal/frontier_ece_reward/centered_abs_mean": 0.004899371787905693, "signal/frontier_ece_reward/group_std_mean": 0.006204710435122251, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006124214734882117, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006124214734882117, "step": 285 }, { "calibration/aurc": 0.4121583044665427, "calibration/batch_distribution_entropy": 0.9241065674179074, "calibration/buffer_distribution_entropy": 0.9318724189860458, "calibration/confidence_entropy": 0.4084272631109712, "calibration/coverage@0%": 0.009375, "calibration/coverage@1%": 0.009375, "calibration/coverage@10%": 0.01640625, "calibration/coverage@15%": 0.01640625, "calibration/coverage@20%": 0.022265625, "calibration/coverage@25%": 0.09375, "calibration/coverage@30%": 0.271875, "calibration/coverage@5%": 0.009375, "calibration/ece": 0.1893808706693447, "calibration/mean_confidence": 0.5403191218712677, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.6, "completions/max_terminated_length": 527.6, "completions/mean_length": 233.3056640625, "completions/mean_terminated_length": 233.3056640625, "completions/min_length": 117.2, "completions/min_terminated_length": 117.2, "epoch": 0.928, "grad_norm": 0.0006464759935624897, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 976715639.0, "reward": 1.0367928266525268, "reward_std": 0.06266704574227333, "rewards/accuracy_reward": 0.6103515625, "rewards/brier_reward": 0.8035065650939941, "rewards/confidence_uniqueness_reward": 0.9388595581054687, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.003317755740135908, "rewards/frontier_coverage_1": 0.13958473801612853, "rewards/frontier_coverage_10": 0.13958473801612853, "rewards/frontier_coverage_15": 0.13682184219360352, "rewards/frontier_coverage_20": 0.1055952787399292, "rewards/frontier_coverage_25": 0.07915615886449814, "rewards/frontier_coverage_5": 0.13958473801612853, "rewards/frontier_ece_reward": 0.005030411807820201, "signal/accuracy_reward/centered_abs_mean": 0.07967529296875, "signal/accuracy_reward/group_std_mean": 0.1087621882557869, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039837646484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039837646484375, "signal/advantage_abs_mean": 0.04707158431410789, "signal/advantage_pre_scale_abs_mean": 0.04707158431410789, "signal/advantage_pre_scale_std": 0.09145613610744477, "signal/advantage_std": 0.09145613610744477, "signal/brier_reward/centered_abs_mean": 0.11411072015762329, "signal/brier_reward/group_std_mean": 0.1472643107175827, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014263840019702911, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014263840019702911, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029550457000732423, "signal/confidence_uniqueness_reward/group_std_mean": 0.037665216624736785, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003693807125091553, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003693807125091553, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029917174484580754, "signal/frontier_aurc_reward/group_std_mean": 0.004943959508091211, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3551741439150645e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3551741439150645e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14711553156375884, "signal/frontier_coverage_1/group_std_mean": 0.19341881871223449, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026333680376410483, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026333680376410483, "signal/frontier_coverage_10/centered_abs_mean": 0.14711553156375884, "signal/frontier_coverage_10/group_std_mean": 0.19341881871223449, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026333680376410483, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026333680376410483, "signal/frontier_coverage_15/centered_abs_mean": 0.14292107820510863, "signal/frontier_coverage_15/group_std_mean": 0.1878840833902359, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025582872331142426, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025582872331142426, "signal/frontier_coverage_20/centered_abs_mean": 0.10920373499393463, "signal/frontier_coverage_20/group_std_mean": 0.14428247809410094, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019547467585653068, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019547467585653068, "signal/frontier_coverage_25/centered_abs_mean": 0.07394303530454635, "signal/frontier_coverage_25/group_std_mean": 0.09636994302272797, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001323580276221037, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001323580276221037, "signal/frontier_coverage_5/centered_abs_mean": 0.14711553156375884, "signal/frontier_coverage_5/group_std_mean": 0.19341881871223449, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026333680376410483, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026333680376410483, "signal/frontier_ece_reward/centered_abs_mean": 0.005132979806512594, "signal/frontier_ece_reward/group_std_mean": 0.00646492512896657, "signal/frontier_ece_reward/group_zero_std_frac": 0.040625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006416224758140742, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006416224758140742, "step": 290 }, { "calibration/aurc": 0.2322381487413944, "calibration/batch_distribution_entropy": 0.9131056212873986, "calibration/buffer_distribution_entropy": 0.9310349150148444, "calibration/confidence_entropy": 0.3937384896123201, "calibration/coverage@0%": 0.057421875, "calibration/coverage@1%": 0.057421875, "calibration/coverage@10%": 0.2296875, "calibration/coverage@15%": 0.35546875, "calibration/coverage@20%": 0.47578125, "calibration/coverage@25%": 0.57265625, "calibration/coverage@30%": 0.6640625, "calibration/coverage@5%": 0.1171875, "calibration/ece": 0.11506757812499999, "calibration/mean_confidence": 0.550976953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 759.8, "completions/max_terminated_length": 550.8, "completions/mean_length": 242.756640625, "completions/mean_terminated_length": 242.6308380126953, "completions/min_length": 120.4, "completions/min_terminated_length": 120.4, "epoch": 0.944, "grad_norm": 0.000754023902118206, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 994176891.0, "reward": 1.039714527130127, "reward_std": 0.07349804490804672, "rewards/accuracy_reward": 0.609765625, "rewards/brier_reward": 0.8154372692108154, "rewards/confidence_uniqueness_reward": 0.9433710098266601, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.003159674210473895, "rewards/frontier_coverage_1": 0.15151877403259278, "rewards/frontier_coverage_10": 0.15151877403259278, "rewards/frontier_coverage_15": 0.14611534178256988, "rewards/frontier_coverage_20": 0.11554400622844696, "rewards/frontier_coverage_25": 0.08363842219114304, "rewards/frontier_coverage_5": 0.15151877403259278, "rewards/frontier_ece_reward": 0.006149538699537516, "signal/accuracy_reward/centered_abs_mean": 0.105615234375, "signal/accuracy_reward/group_std_mean": 0.13928218185901642, "signal/accuracy_reward/group_zero_std_frac": 0.603125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0528076171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0528076171875, "signal/advantage_abs_mean": 0.05543512031435967, "signal/advantage_pre_scale_abs_mean": 0.05543512031435967, "signal/advantage_pre_scale_std": 0.1069907784461975, "signal/advantage_std": 0.1069907784461975, "signal/brier_reward/centered_abs_mean": 0.11077233403921127, "signal/brier_reward/group_std_mean": 0.14170551002025605, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01384654175490141, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01384654175490141, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026658696308732032, "signal/confidence_uniqueness_reward/group_std_mean": 0.03480346091091633, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003332337038591504, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003332337038591504, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002645864570513368, "signal/frontier_aurc_reward/group_std_mean": 0.004210462234914303, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7360976168420166e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7360976168420166e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.146555095911026, "signal/frontier_coverage_1/group_std_mean": 0.18947678804397583, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026233360171318055, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026233360171318055, "signal/frontier_coverage_10/centered_abs_mean": 0.146555095911026, "signal/frontier_coverage_10/group_std_mean": 0.18947678804397583, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026233360171318055, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026233360171318055, "signal/frontier_coverage_15/centered_abs_mean": 0.14236142635345458, "signal/frontier_coverage_15/group_std_mean": 0.18402716219425203, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025482695084065197, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025482695084065197, "signal/frontier_coverage_20/centered_abs_mean": 0.10681335628032684, "signal/frontier_coverage_20/group_std_mean": 0.13840672373771667, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019119590055197478, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019119590055197478, "signal/frontier_coverage_25/centered_abs_mean": 0.07202518582344056, "signal/frontier_coverage_25/group_std_mean": 0.09276955872774124, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001289250748232007, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001289250748232007, "signal/frontier_coverage_5/centered_abs_mean": 0.146555095911026, "signal/frontier_coverage_5/group_std_mean": 0.18947678804397583, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026233360171318055, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026233360171318055, "signal/frontier_ece_reward/centered_abs_mean": 0.0051291721872985365, "signal/frontier_ece_reward/group_std_mean": 0.006471920944750309, "signal/frontier_ece_reward/group_zero_std_frac": 0.0375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006411465234123171, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006411465234123171, "step": 295 }, { "calibration/aurc": 0.33955971949694363, "calibration/batch_distribution_entropy": 0.8857733162872842, "calibration/buffer_distribution_entropy": 0.9298230128935456, "calibration/confidence_entropy": 0.3718501892191845, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.02578125, "calibration/coverage@15%": 0.146875, "calibration/coverage@20%": 0.31640625, "calibration/coverage@25%": 0.36875, "calibration/coverage@30%": 0.405859375, "calibration/coverage@5%": 0.00703125, "calibration/ece": 0.1850365021007531, "calibration/mean_confidence": 0.5966286162392834, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 773.0, "completions/max_terminated_length": 586.2, "completions/mean_length": 245.58359375, "completions/mean_terminated_length": 245.33098449707032, "completions/min_length": 121.4, "completions/min_terminated_length": 121.4, "epoch": 0.96, "grad_norm": 0.0005919244140386581, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 1011631987.0, "reward": 1.0362752437591554, "reward_std": 0.06122729256749153, "rewards/accuracy_reward": 0.59599609375, "rewards/brier_reward": 0.829588258266449, "rewards/confidence_uniqueness_reward": 0.9436734795570374, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0040486148092895744, "rewards/frontier_coverage_1": 0.17092968821525573, "rewards/frontier_coverage_10": 0.17092968821525573, "rewards/frontier_coverage_15": 0.16497417092323302, "rewards/frontier_coverage_20": 0.12459131479263305, "rewards/frontier_coverage_25": 0.08991340845823288, "rewards/frontier_coverage_5": 0.17092968821525573, "rewards/frontier_ece_reward": 0.006544246431440115, "signal/accuracy_reward/centered_abs_mean": 0.076776123046875, "signal/accuracy_reward/group_std_mean": 0.10603935718536377, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0383880615234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0383880615234375, "signal/advantage_abs_mean": 0.04462712332606315, "signal/advantage_pre_scale_abs_mean": 0.04462712332606315, "signal/advantage_pre_scale_std": 0.09253625869750977, "signal/advantage_std": 0.09253625869750977, "signal/brier_reward/centered_abs_mean": 0.09673822671175003, "signal/brier_reward/group_std_mean": 0.12821625024080277, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012092278338968754, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012092278338968754, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025702812895178794, "signal/confidence_uniqueness_reward/group_std_mean": 0.03373255953192711, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032128516118973493, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032128516118973493, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003233627835288644, "signal/frontier_aurc_reward/group_std_mean": 0.004815721325576306, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7881935936165975e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7881935936165975e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12415488362312317, "signal/frontier_coverage_1/group_std_mean": 0.16360531747341156, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022223723120987416, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022223723120987416, "signal/frontier_coverage_10/centered_abs_mean": 0.12415488362312317, "signal/frontier_coverage_10/group_std_mean": 0.16360531747341156, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022223723120987416, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022223723120987416, "signal/frontier_coverage_15/centered_abs_mean": 0.1208252727985382, "signal/frontier_coverage_15/group_std_mean": 0.1590008407831192, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002162772277370095, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002162772277370095, "signal/frontier_coverage_20/centered_abs_mean": 0.0897542342543602, "signal/frontier_coverage_20/group_std_mean": 0.11785899251699447, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016066007083281875, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016066007083281875, "signal/frontier_coverage_25/centered_abs_mean": 0.0641759216785431, "signal/frontier_coverage_25/group_std_mean": 0.08313264548778534, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011487489799037575, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011487489799037575, "signal/frontier_coverage_5/centered_abs_mean": 0.12415488362312317, "signal/frontier_coverage_5/group_std_mean": 0.16360531747341156, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022223723120987416, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022223723120987416, "signal/frontier_ece_reward/centered_abs_mean": 0.004897785000503063, "signal/frontier_ece_reward/group_std_mean": 0.0062327212654054165, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006122231250628829, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006122231250628829, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.46128748795665087, "eval_calibration/batch_distribution_entropy": 0.8756256531737828, "eval_calibration/buffer_distribution_entropy": 0.9280413379460178, "eval_calibration/confidence_entropy": 0.41193622937777086, "eval_calibration/coverage@0%": 0.0703125, "eval_calibration/coverage@1%": 0.0703125, "eval_calibration/coverage@10%": 0.0703125, "eval_calibration/coverage@15%": 0.1171875, "eval_calibration/coverage@20%": 0.140625, "eval_calibration/coverage@25%": 0.1953125, "eval_calibration/coverage@30%": 0.2109375, "eval_calibration/coverage@5%": 0.0703125, "eval_calibration/ece": 0.21648227695323158, "eval_calibration/mean_confidence": 0.5482010269532316, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 456.75, "eval_completions/max_terminated_length": 456.75, "eval_completions/mean_length": 251.32495880126953, "eval_completions/mean_terminated_length": 251.32495880126953, "eval_completions/min_length": 142.0, "eval_completions/min_terminated_length": 142.0, "eval_loss": 0.0, "eval_num_tokens": 1011631987.0, "eval_reward": 0.9415136426687241, "eval_reward_std": 0.23694248497486115, "eval_rewards/accuracy_reward": 0.421875, "eval_rewards/brier_reward": 0.7852368354797363, "eval_rewards/confidence_uniqueness_reward": 0.895263671875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0037054395070299506, "eval_rewards/frontier_coverage_1": 0.21414391696453094, "eval_rewards/frontier_coverage_10": 0.21414391696453094, "eval_rewards/frontier_coverage_15": 0.2132040672004223, "eval_rewards/frontier_coverage_20": 0.1507317405194044, "eval_rewards/frontier_coverage_25": 0.10258364118635654, "eval_rewards/frontier_coverage_5": 0.21414391696453094, "eval_rewards/frontier_ece_reward": 0.005837466917000711, "eval_runtime": 22.9077, "eval_samples_per_second": 21.827, "eval_signal/accuracy_reward/centered_abs_mean": 0.4697265625, "eval_signal/accuracy_reward/group_std_mean": 0.4920375719666481, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23486328125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23486328125, "eval_signal/advantage_abs_mean": 0.21758165583014488, "eval_signal/advantage_pre_scale_abs_mean": 0.21758165583014488, "eval_signal/advantage_pre_scale_std": 0.23440348356962204, "eval_signal/advantage_std": 0.23440348356962204, "eval_signal/brier_reward/centered_abs_mean": 0.23594782128930092, "eval_signal/brier_reward/group_std_mean": 0.2868555709719658, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029493477661162615, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.029493477661162615, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0440826416015625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.053775970824062824, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055103302001953125, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055103302001953125, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004771354433614761, "eval_signal/frontier_aurc_reward/group_std_mean": 0.009579721372574568, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.540724047634285e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.540724047634285e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3561520427465439, "eval_signal/frontier_coverage_1/group_std_mean": 0.4406690001487732, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006375121418386698, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006375121418386698, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3561520427465439, "eval_signal/frontier_coverage_10/group_std_mean": 0.4406690001487732, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006375121418386698, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006375121418386698, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3545154333114624, "eval_signal/frontier_coverage_15/group_std_mean": 0.4385922998189926, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006345825968310237, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006345825968310237, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.24524880945682526, "eval_signal/frontier_coverage_20/group_std_mean": 0.30554553121328354, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0043899534502997994, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0043899534502997994, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.14788980782032013, "eval_signal/frontier_coverage_25/group_std_mean": 0.18959416821599007, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026472274912521243, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026472274912521243, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3561520427465439, "eval_signal/frontier_coverage_5/group_std_mean": 0.4406690001487732, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006375121418386698, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006375121418386698, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.009522880194708705, "eval_signal/frontier_ece_reward/group_std_mean": 0.011444894364103675, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011903600243385881, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011903600243385881, "eval_steps_per_second": 0.175, "step": 300 }, { "calibration/aurc": 0.2459965736429119, "calibration/batch_distribution_entropy": 0.9147780574307252, "calibration/buffer_distribution_entropy": 0.9281321077000616, "calibration/confidence_entropy": 0.4052235295442716, "calibration/coverage@0%": 0.029296875, "calibration/coverage@1%": 0.029296875, "calibration/coverage@10%": 0.279296875, "calibration/coverage@15%": 0.39609375, "calibration/coverage@20%": 0.5109375, "calibration/coverage@25%": 0.56796875, "calibration/coverage@30%": 0.62109375, "calibration/coverage@5%": 0.07265625, "calibration/ece": 0.12373667296527428, "calibration/mean_confidence": 0.5660033672845597, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1153.4, "completions/max_terminated_length": 594.8, "completions/mean_length": 248.48974609375, "completions/mean_terminated_length": 247.73369445800782, "completions/min_length": 120.4, "completions/min_terminated_length": 120.4, "epoch": 0.976, "grad_norm": 0.000649201450869441, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 1029037642.0, "reward": 1.0403510093688966, "reward_std": 0.061715726554393766, "rewards/accuracy_reward": 0.61328125, "rewards/brier_reward": 0.819303834438324, "rewards/confidence_uniqueness_reward": 0.9493087649345398, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002960939984768629, "rewards/frontier_coverage_1": 0.12880419678986071, "rewards/frontier_coverage_10": 0.12880419678986071, "rewards/frontier_coverage_15": 0.12648025006055832, "rewards/frontier_coverage_20": 0.10162520408630371, "rewards/frontier_coverage_25": 0.07422572486102581, "rewards/frontier_coverage_5": 0.12880419678986071, "rewards/frontier_ece_reward": 0.005600927863270044, "signal/accuracy_reward/centered_abs_mean": 0.07640380859375, "signal/accuracy_reward/group_std_mean": 0.106211369484663, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.038201904296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.038201904296875, "signal/advantage_abs_mean": 0.04466826543211937, "signal/advantage_pre_scale_abs_mean": 0.04466826543211937, "signal/advantage_pre_scale_std": 0.09117967635393143, "signal/advantage_std": 0.09117967635393143, "signal/brier_reward/centered_abs_mean": 0.09978740066289901, "signal/brier_reward/group_std_mean": 0.13079679906368255, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012473425082862376, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012473425082862376, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023243288323283196, "signal/confidence_uniqueness_reward/group_std_mean": 0.031104259938001633, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029054110404103995, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029054110404103995, "signal/format_reward/centered_abs_mean": 0.001275634765625, "signal/format_reward/group_std_mean": 0.002798851951956749, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006378173828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006378173828125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002286581532098353, "signal/frontier_aurc_reward/group_std_mean": 0.003588835708796978, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0929808164946735e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0929808164946735e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13535113930702208, "signal/frontier_coverage_1/group_std_mean": 0.1770560622215271, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024227853398770096, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024227853398770096, "signal/frontier_coverage_10/centered_abs_mean": 0.13535113930702208, "signal/frontier_coverage_10/group_std_mean": 0.1770560622215271, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024227853398770096, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024227853398770096, "signal/frontier_coverage_15/centered_abs_mean": 0.13063560128211976, "signal/frontier_coverage_15/group_std_mean": 0.17087839543819427, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023383772233501075, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023383772233501075, "signal/frontier_coverage_20/centered_abs_mean": 0.09394551813602448, "signal/frontier_coverage_20/group_std_mean": 0.12267908304929734, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016816247487440705, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016816247487440705, "signal/frontier_coverage_25/centered_abs_mean": 0.06524330824613571, "signal/frontier_coverage_25/group_std_mean": 0.08443802744150161, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011678551556542516, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011678551556542516, "signal/frontier_coverage_5/centered_abs_mean": 0.13535113930702208, "signal/frontier_coverage_5/group_std_mean": 0.1770560622215271, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024227853398770096, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024227853398770096, "signal/frontier_ece_reward/centered_abs_mean": 0.004773552063852549, "signal/frontier_ece_reward/group_std_mean": 0.006137214787304402, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005966940079815686, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005966940079815686, "step": 305 }, { "calibration/aurc": 0.3583176928401651, "calibration/batch_distribution_entropy": 0.9294162782931643, "calibration/buffer_distribution_entropy": 0.9286617297056032, "calibration/confidence_entropy": 0.40752941939728976, "calibration/coverage@0%": 0.01484375, "calibration/coverage@1%": 0.01484375, "calibration/coverage@10%": 0.05625, "calibration/coverage@15%": 0.107421875, "calibration/coverage@20%": 0.136328125, "calibration/coverage@25%": 0.28046875, "calibration/coverage@30%": 0.403515625, "calibration/coverage@5%": 0.01875, "calibration/ece": 0.16133625666639104, "calibration/mean_confidence": 0.49935197729238023, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.6, "completions/max_terminated_length": 571.6, "completions/mean_length": 241.47685546875, "completions/mean_terminated_length": 241.47685546875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.992, "grad_norm": 0.0007859015022404492, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1046638845.0, "reward": 1.0216437101364135, "reward_std": 0.06261338591575623, "rewards/accuracy_reward": 0.573828125, "rewards/brier_reward": 0.8116239428520202, "rewards/confidence_uniqueness_reward": 0.9438789367675782, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0030248827766627072, "rewards/frontier_coverage_1": 0.1581905961036682, "rewards/frontier_coverage_10": 0.1581905961036682, "rewards/frontier_coverage_15": 0.15104590654373168, "rewards/frontier_coverage_20": 0.1126480221748352, "rewards/frontier_coverage_25": 0.08232222348451615, "rewards/frontier_coverage_5": 0.1581905961036682, "rewards/frontier_ece_reward": 0.00525932852178812, "signal/accuracy_reward/centered_abs_mean": 0.0842041015625, "signal/accuracy_reward/group_std_mean": 0.11003593057394027, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04210205078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04210205078125, "signal/advantage_abs_mean": 0.04805062413215637, "signal/advantage_pre_scale_abs_mean": 0.04805062413215637, "signal/advantage_pre_scale_std": 0.09543496072292328, "signal/advantage_std": 0.09543496072292328, "signal/brier_reward/centered_abs_mean": 0.10354410707950593, "signal/brier_reward/group_std_mean": 0.13237534910440446, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01294301338493824, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01294301338493824, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024483132362365722, "signal/confidence_uniqueness_reward/group_std_mean": 0.031796820089221, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030603915452957152, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030603915452957152, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002486141538247466, "signal/frontier_aurc_reward/group_std_mean": 0.004122919822111726, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4501933734864e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4501933734864e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1334092989563942, "signal/frontier_coverage_1/group_std_mean": 0.17288561463356017, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00238802635576576, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00238802635576576, "signal/frontier_coverage_10/centered_abs_mean": 0.1334092989563942, "signal/frontier_coverage_10/group_std_mean": 0.17288561463356017, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00238802635576576, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00238802635576576, "signal/frontier_coverage_15/centered_abs_mean": 0.1263820171356201, "signal/frontier_coverage_15/group_std_mean": 0.16385815739631654, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022622381802648306, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022622381802648306, "signal/frontier_coverage_20/centered_abs_mean": 0.08821254223585129, "signal/frontier_coverage_20/group_std_mean": 0.11483215242624283, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015790044097229838, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015790044097229838, "signal/frontier_coverage_25/centered_abs_mean": 0.06242813915014267, "signal/frontier_coverage_25/group_std_mean": 0.08045191913843155, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001117463654372841, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001117463654372841, "signal/frontier_coverage_5/centered_abs_mean": 0.1334092989563942, "signal/frontier_coverage_5/group_std_mean": 0.17288561463356017, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00238802635576576, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00238802635576576, "signal/frontier_ece_reward/centered_abs_mean": 0.00461451355367899, "signal/frontier_ece_reward/group_std_mean": 0.005895448848605156, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005768141942098737, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005768141942098737, "step": 310 }, { "calibration/aurc": 0.27146581736436004, "calibration/batch_distribution_entropy": 0.8746381090032178, "calibration/buffer_distribution_entropy": 0.9301393590851372, "calibration/confidence_entropy": 0.3797766950336877, "calibration/coverage@0%": 0.0107421875, "calibration/coverage@1%": 0.0107421875, "calibration/coverage@10%": 0.1044921875, "calibration/coverage@15%": 0.12890625, "calibration/coverage@20%": 0.171875, "calibration/coverage@25%": 0.4501953125, "calibration/coverage@30%": 0.67578125, "calibration/coverage@5%": 0.0693359375, "calibration/ece": 0.17222164030985487, "calibration/mean_confidence": 0.6362255465598549, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.5, "completions/max_terminated_length": 524.5, "completions/mean_length": 244.13977813720703, "completions/mean_terminated_length": 244.13977813720703, "completions/min_length": 123.5, "completions/min_terminated_length": 123.5, "epoch": 0.9984, "num_tokens": 1053636222.0, "reward": 1.0224891901016235, "reward_std": 0.06762239336967468, "rewards/accuracy_reward": 0.590087890625, "rewards/brier_reward": 0.7884363532066345, "rewards/confidence_uniqueness_reward": 0.9537101686000824, "rewards/format_reward": 0.999755859375, "rewards/frontier_aurc_reward": -0.002803257084451616, "rewards/frontier_coverage_1": 0.10104693099856377, "rewards/frontier_coverage_10": 0.10104693099856377, "rewards/frontier_coverage_15": 0.09339457005262375, "rewards/frontier_coverage_20": 0.06919080764055252, "rewards/frontier_coverage_25": 0.054883923381567, "rewards/frontier_coverage_5": 0.10104693099856377, "rewards/frontier_ece_reward": 0.004242375260218978, "signal/accuracy_reward/centered_abs_mean": 0.0766448974609375, "signal/accuracy_reward/group_std_mean": 0.10971884056925774, "signal/accuracy_reward/group_zero_std_frac": 0.6484375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03832244873046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03832244873046875, "signal/advantage_abs_mean": 0.04935746267437935, "signal/advantage_pre_scale_abs_mean": 0.04935746267437935, "signal/advantage_pre_scale_std": 0.0978938564658165, "signal/advantage_std": 0.0978938564658165, "signal/brier_reward/centered_abs_mean": 0.10980122536420822, "signal/brier_reward/group_std_mean": 0.14020781219005585, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013725153170526028, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013725153170526028, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0197527464479208, "signal/confidence_uniqueness_reward/group_std_mean": 0.026468923315405846, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024690933059901, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024690933059901, "signal/format_reward/centered_abs_mean": 0.0004730224609375, "signal/format_reward/group_std_mean": 0.0013810679083690047, "signal/format_reward/group_zero_std_frac": 0.9921875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023492295295000076, "signal/frontier_aurc_reward/group_std_mean": 0.003797045210376382, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.205120785627514e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.205120785627514e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1279282197356224, "signal/frontier_coverage_1/group_std_mean": 0.16734013706445694, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022899151081219316, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022899151081219316, "signal/frontier_coverage_10/centered_abs_mean": 0.1279282197356224, "signal/frontier_coverage_10/group_std_mean": 0.16734013706445694, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022899151081219316, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022899151081219316, "signal/frontier_coverage_15/centered_abs_mean": 0.1206963062286377, "signal/frontier_coverage_15/group_std_mean": 0.15781164169311523, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021604637149721384, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021604637149721384, "signal/frontier_coverage_20/centered_abs_mean": 0.08331097289919853, "signal/frontier_coverage_20/group_std_mean": 0.10951121896505356, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014912663027644157, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014912663027644157, "signal/frontier_coverage_25/centered_abs_mean": 0.057270800694823265, "signal/frontier_coverage_25/group_std_mean": 0.07496540248394012, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001025147212203592, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001025147212203592, "signal/frontier_coverage_5/centered_abs_mean": 0.1279282197356224, "signal/frontier_coverage_5/group_std_mean": 0.16734013706445694, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022899151081219316, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022899151081219316, "signal/frontier_ece_reward/centered_abs_mean": 0.004676059354096651, "signal/frontier_ece_reward/group_std_mean": 0.006006488110870123, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005845074192620814, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005845074192620814, "step": 312, "total_flos": 0.0, "train_loss": 0.004934354589493477, "train_runtime": 58560.1613, "train_samples_per_second": 0.342, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1053636222, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }