Files
RLCR-v4-ks-uniqueness-buf5k…/trainer_state.json
ModelHub XC 4912ecd619 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-buf5k-hotpot
Source: Original Platform
2026-04-12 05:37:57 +08:00

8568 lines
523 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.6520791014315168,
"calibration/batch_distribution_entropy": 0.6520500556130477,
"calibration/buffer_distribution_entropy": 0.6327125582954866,
"calibration/confidence_entropy": 0.350359581714784,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5029794396857591,
"calibration/mean_confidence": 0.7884929763595387,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03837890625,
"completions/max_length": 1496.0,
"completions/max_terminated_length": 1496.0,
"completions/mean_length": 214.4638671875,
"completions/mean_terminated_length": 223.0337371826172,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.03049488551914692,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0128,
"num_tokens": 17040142.0,
"reward": 0.5860571265220642,
"reward_std": 0.43425698280334474,
"rewards/accuracy_reward": 0.2166015625,
"rewards/brier_reward": 0.3724069595336914,
"rewards/confidence_uniqueness_reward": 0.4859975099563599,
"rewards/format_reward": 0.6830078125,
"rewards/frontier_aurc_reward": 0.11845082510262728,
"rewards/frontier_coverage_1": 0.15014827474951745,
"rewards/frontier_coverage_10": 0.15014827474951745,
"rewards/frontier_coverage_15": 0.15014827474951745,
"rewards/frontier_coverage_20": 0.15014827474951745,
"rewards/frontier_coverage_25": 0.15014827474951745,
"rewards/frontier_coverage_5": 0.15014827474951745,
"rewards/frontier_ece_reward": 0.08564566820859909,
"signal/accuracy_reward/centered_abs_mean": 0.236376953125,
"signal/accuracy_reward/group_std_mean": 0.2783313035964966,
"signal/accuracy_reward/group_zero_std_frac": 0.328125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1181884765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1181884765625,
"signal/advantage_abs_mean": 0.3693191409111023,
"signal/advantage_pre_scale_abs_mean": 0.3693191409111023,
"signal/advantage_pre_scale_std": 0.4428039789199829,
"signal/advantage_std": 0.4428039789199829,
"signal/brier_reward/centered_abs_mean": 0.3175159811973572,
"signal/brier_reward/group_std_mean": 0.36290293335914614,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03968949764966965,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03968949764966965,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2981721043586731,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3485286235809326,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.037271513044834136,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.037271513044834136,
"signal/format_reward/centered_abs_mean": 0.401025390625,
"signal/format_reward/group_std_mean": 0.4520082473754883,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2005126953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.2005126953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.12194366091862321,
"signal/frontier_aurc_reward/group_std_mean": 0.14532124884426595,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0021827913296874613,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0021827913296874613,
"signal/frontier_coverage_1/centered_abs_mean": 0.15990545451641083,
"signal/frontier_coverage_1/group_std_mean": 0.20984587371349334,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_10/centered_abs_mean": 0.15990545451641083,
"signal/frontier_coverage_10/group_std_mean": 0.20984587371349334,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_15/centered_abs_mean": 0.15990545451641083,
"signal/frontier_coverage_15/group_std_mean": 0.20984587371349334,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_20/centered_abs_mean": 0.15990545451641083,
"signal/frontier_coverage_20/group_std_mean": 0.20984587371349334,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_25/centered_abs_mean": 0.15990545451641083,
"signal/frontier_coverage_25/group_std_mean": 0.20984587371349334,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_5/centered_abs_mean": 0.15990545451641083,
"signal/frontier_coverage_5/group_std_mean": 0.20984587371349334,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00286230742931366,
"signal/frontier_ece_reward/centered_abs_mean": 0.20278680622577666,
"signal/frontier_ece_reward/group_std_mean": 0.23787854015827178,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.025348350778222083,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.025348350778222083,
"step": 5
},
{
"calibration/aurc": 0.686738104669059,
"calibration/batch_distribution_entropy": 0.6544058337284724,
"calibration/buffer_distribution_entropy": 0.659068819943599,
"calibration/confidence_entropy": 0.34263287247108665,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5280512691315363,
"calibration/mean_confidence": 0.7892795020774962,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03984375,
"completions/max_length": 1517.2,
"completions/max_terminated_length": 1517.2,
"completions/mean_length": 205.0271484375,
"completions/mean_terminated_length": 213.58506469726564,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.02306351251900196,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0055,
"num_tokens": 34239972.0,
"reward": 0.5702811002731323,
"reward_std": 0.39164897203445437,
"rewards/accuracy_reward": 0.20810546875,
"rewards/brier_reward": 0.37900232076644896,
"rewards/confidence_uniqueness_reward": 0.5172426342964173,
"rewards/format_reward": 0.71611328125,
"rewards/frontier_aurc_reward": -0.007401939295232296,
"rewards/frontier_coverage_1": 0.04787743985652924,
"rewards/frontier_coverage_10": 0.04787743985652924,
"rewards/frontier_coverage_15": 0.04787743985652924,
"rewards/frontier_coverage_20": 0.04787743985652924,
"rewards/frontier_coverage_25": 0.04787743985652924,
"rewards/frontier_coverage_5": 0.04787743985652924,
"rewards/frontier_ece_reward": -0.07094736471772194,
"signal/accuracy_reward/centered_abs_mean": 0.221295166015625,
"signal/accuracy_reward/group_std_mean": 0.26698018312454225,
"signal/accuracy_reward/group_zero_std_frac": 0.325,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1106475830078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1106475830078125,
"signal/advantage_abs_mean": 0.3250237703323364,
"signal/advantage_pre_scale_abs_mean": 0.3250237703323364,
"signal/advantage_pre_scale_std": 0.39961376786231995,
"signal/advantage_std": 0.39961376786231995,
"signal/brier_reward/centered_abs_mean": 0.30669012665748596,
"signal/brier_reward/group_std_mean": 0.35525786876678467,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.038336265832185745,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.038336265832185745,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.28253401517868043,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3407617449760437,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035316751897335054,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.035316751897335054,
"signal/format_reward/centered_abs_mean": 0.376849365234375,
"signal/format_reward/group_std_mean": 0.43812766671180725,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1884246826171875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1884246826171875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.007204313110560179,
"signal/frontier_aurc_reward/group_std_mean": 0.010008147358894348,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00012895720137748868,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00012895720137748868,
"signal/frontier_coverage_1/centered_abs_mean": 0.07800407558679581,
"signal/frontier_coverage_1/group_std_mean": 0.13180075734853744,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_10/centered_abs_mean": 0.07800407558679581,
"signal/frontier_coverage_10/group_std_mean": 0.13180075734853744,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_15/centered_abs_mean": 0.07800407558679581,
"signal/frontier_coverage_15/group_std_mean": 0.13180075734853744,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_20/centered_abs_mean": 0.07800407558679581,
"signal/frontier_coverage_20/group_std_mean": 0.13180075734853744,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_25/centered_abs_mean": 0.07800407558679581,
"signal/frontier_coverage_25/group_std_mean": 0.13180075734853744,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_5/centered_abs_mean": 0.07800407558679581,
"signal/frontier_coverage_5/group_std_mean": 0.13180075734853744,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013962728437036276,
"signal/frontier_ece_reward/centered_abs_mean": 0.15404822826385497,
"signal/frontier_ece_reward/group_std_mean": 0.1783599078655243,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.019256028532981872,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.019256028532981872,
"step": 10
},
{
"calibration/aurc": 0.6088888477468031,
"calibration/batch_distribution_entropy": 0.6341681322259153,
"calibration/buffer_distribution_entropy": 0.6608073296107667,
"calibration/confidence_entropy": 0.33731174701551137,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.48281426176192976,
"calibration/mean_confidence": 0.8022067569106547,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1483.8,
"completions/max_terminated_length": 1483.8,
"completions/mean_length": 174.7013671875,
"completions/mean_terminated_length": 178.33230590820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.8,
"epoch": 0.048,
"grad_norm": 0.024171145632863045,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0028,
"num_tokens": 51077650.0,
"reward": 0.712907898426056,
"reward_std": 0.3011354684829712,
"rewards/accuracy_reward": 0.2693359375,
"rewards/brier_reward": 0.48231119513511655,
"rewards/confidence_uniqueness_reward": 0.6418869256973266,
"rewards/format_reward": 0.88173828125,
"rewards/frontier_aurc_reward": -0.007521647773683071,
"rewards/frontier_coverage_1": 0.06328226923942566,
"rewards/frontier_coverage_10": 0.06328226923942566,
"rewards/frontier_coverage_15": 0.06328226923942566,
"rewards/frontier_coverage_20": 0.06328226923942566,
"rewards/frontier_coverage_25": 0.06328226923942566,
"rewards/frontier_coverage_5": 0.06328226923942566,
"rewards/frontier_ece_reward": -0.07852677553892136,
"signal/accuracy_reward/centered_abs_mean": 0.19903564453125,
"signal/accuracy_reward/group_std_mean": 0.24764367938041687,
"signal/accuracy_reward/group_zero_std_frac": 0.353125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.099517822265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.099517822265625,
"signal/advantage_abs_mean": 0.22455401420593263,
"signal/advantage_pre_scale_abs_mean": 0.22455401420593263,
"signal/advantage_pre_scale_std": 0.31378203332424165,
"signal/advantage_std": 0.31378203332424165,
"signal/brier_reward/centered_abs_mean": 0.2732009679079056,
"signal/brier_reward/group_std_mean": 0.32669638395309447,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0341501209884882,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0341501209884882,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19731962382793428,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2597633212804794,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024664952978491785,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.024664952978491785,
"signal/format_reward/centered_abs_mean": 0.191900634765625,
"signal/format_reward/group_std_mean": 0.28800458312034605,
"signal/format_reward/group_zero_std_frac": 0.096875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0959503173828125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0959503173828125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.006012374442070723,
"signal/frontier_aurc_reward/group_std_mean": 0.0084139633923769,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010762150050140918,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010762150050140918,
"signal/frontier_coverage_1/centered_abs_mean": 0.09483139663934707,
"signal/frontier_coverage_1/group_std_mean": 0.15267471969127655,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_10/centered_abs_mean": 0.09483139663934707,
"signal/frontier_coverage_10/group_std_mean": 0.15267471969127655,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_15/centered_abs_mean": 0.09483139663934707,
"signal/frontier_coverage_15/group_std_mean": 0.15267471969127655,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_20/centered_abs_mean": 0.09483139663934707,
"signal/frontier_coverage_20/group_std_mean": 0.15267471969127655,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_25/centered_abs_mean": 0.09483139663934707,
"signal/frontier_coverage_25/group_std_mean": 0.15267471969127655,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_5/centered_abs_mean": 0.09483139663934707,
"signal/frontier_coverage_5/group_std_mean": 0.15267471969127655,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001697481912560761,
"signal/frontier_ece_reward/centered_abs_mean": 0.14207346588373185,
"signal/frontier_ece_reward/group_std_mean": 0.1723174571990967,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01775918323546648,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01775918323546648,
"step": 15
},
{
"calibration/aurc": 0.5364292201940342,
"calibration/batch_distribution_entropy": 0.6899606180644569,
"calibration/buffer_distribution_entropy": 0.6629842838989687,
"calibration/confidence_entropy": 0.3598924957744884,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3788861635427105,
"calibration/mean_confidence": 0.78362823091762,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00361328125,
"completions/max_length": 1318.6,
"completions/max_terminated_length": 1318.6,
"completions/mean_length": 131.579296875,
"completions/mean_terminated_length": 132.06165008544923,
"completions/min_length": 0.0,
"completions/min_terminated_length": 39.8,
"epoch": 0.064,
"grad_norm": 0.007380437571555376,
"learning_rate": 1e-06,
"loss": -0.0008,
"num_tokens": 67343422.0,
"reward": 0.8247708439826965,
"reward_std": 0.20879798531532287,
"rewards/accuracy_reward": 0.341796875,
"rewards/brier_reward": 0.5677559733390808,
"rewards/confidence_uniqueness_reward": 0.7422728180885315,
"rewards/format_reward": 0.98212890625,
"rewards/frontier_aurc_reward": -0.007509990967810154,
"rewards/frontier_coverage_1": 0.061825338006019595,
"rewards/frontier_coverage_10": 0.061825338006019595,
"rewards/frontier_coverage_15": 0.061825338006019595,
"rewards/frontier_coverage_20": 0.061825338006019595,
"rewards/frontier_coverage_25": 0.061825338006019595,
"rewards/frontier_coverage_5": 0.061825338006019595,
"rewards/frontier_ece_reward": -0.05961005799472332,
"signal/accuracy_reward/centered_abs_mean": 0.19915771484375,
"signal/accuracy_reward/group_std_mean": 0.24949793219566346,
"signal/accuracy_reward/group_zero_std_frac": 0.346875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.099578857421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.099578857421875,
"signal/advantage_abs_mean": 0.15828121602535247,
"signal/advantage_pre_scale_abs_mean": 0.15828121602535247,
"signal/advantage_pre_scale_std": 0.2283491849899292,
"signal/advantage_std": 0.2283491849899292,
"signal/brier_reward/centered_abs_mean": 0.24234021306037903,
"signal/brier_reward/group_std_mean": 0.29873130321502683,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03029252663254738,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03029252663254738,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1241894617676735,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1588551729917526,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015523682720959187,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015523682720959187,
"signal/format_reward/centered_abs_mean": 0.033868408203125,
"signal/format_reward/group_std_mean": 0.08356819599866867,
"signal/format_reward/group_zero_std_frac": 0.584375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0169342041015625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0169342041015625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005343654099851847,
"signal/frontier_aurc_reward/group_std_mean": 0.007241041865199804,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.565140499034896e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.565140499034896e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.10414516925811768,
"signal/frontier_coverage_1/group_std_mean": 0.16521920561790465,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_10/centered_abs_mean": 0.10414516925811768,
"signal/frontier_coverage_10/group_std_mean": 0.16521920561790465,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_15/centered_abs_mean": 0.10414516925811768,
"signal/frontier_coverage_15/group_std_mean": 0.16521920561790465,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_20/centered_abs_mean": 0.10414516925811768,
"signal/frontier_coverage_20/group_std_mean": 0.16521920561790465,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_25/centered_abs_mean": 0.10414516925811768,
"signal/frontier_coverage_25/group_std_mean": 0.16521920561790465,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_5/centered_abs_mean": 0.10414516925811768,
"signal/frontier_coverage_5/group_std_mean": 0.16521920561790465,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001864198502153158,
"signal/frontier_ece_reward/centered_abs_mean": 0.1342237263917923,
"signal/frontier_ece_reward/group_std_mean": 0.1653001368045807,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01677796579897404,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01677796579897404,
"step": 20
},
{
"calibration/aurc": 0.6140042512055071,
"calibration/batch_distribution_entropy": 0.7993870619632342,
"calibration/buffer_distribution_entropy": 0.7065876031482226,
"calibration/confidence_entropy": 0.44797268663899076,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.401363358826722,
"calibration/mean_confidence": 0.7178722624377836,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 578.0,
"completions/max_terminated_length": 578.0,
"completions/mean_length": 111.48779296875,
"completions/mean_terminated_length": 111.6080093383789,
"completions/min_length": 0.0,
"completions/min_terminated_length": 34.4,
"epoch": 0.08,
"grad_norm": 0.01710013672709465,
"learning_rate": 1e-06,
"loss": -0.0016,
"num_tokens": 83418209.0,
"reward": 0.8598999977111816,
"reward_std": 0.17347855865955353,
"rewards/accuracy_reward": 0.3623046875,
"rewards/brier_reward": 0.6238975405693055,
"rewards/confidence_uniqueness_reward": 0.8124577879905701,
"rewards/format_reward": 0.99453125,
"rewards/frontier_aurc_reward": -0.00675971582531929,
"rewards/frontier_coverage_1": 0.06414978951215744,
"rewards/frontier_coverage_10": 0.06414978951215744,
"rewards/frontier_coverage_15": 0.06414978951215744,
"rewards/frontier_coverage_20": 0.06414978951215744,
"rewards/frontier_coverage_25": 0.06414978951215744,
"rewards/frontier_coverage_5": 0.06414978951215744,
"rewards/frontier_ece_reward": -0.038648569211363795,
"signal/accuracy_reward/centered_abs_mean": 0.1884521484375,
"signal/accuracy_reward/group_std_mean": 0.23765672743320465,
"signal/accuracy_reward/group_zero_std_frac": 0.35625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09422607421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09422607421875,
"signal/advantage_abs_mean": 0.13513298332691193,
"signal/advantage_pre_scale_abs_mean": 0.13513298332691193,
"signal/advantage_pre_scale_std": 0.19442801177501678,
"signal/advantage_std": 0.19442801177501678,
"signal/brier_reward/centered_abs_mean": 0.2162698209285736,
"signal/brier_reward/group_std_mean": 0.2669309198856354,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0270337276160717,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0270337276160717,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07793950736522674,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10388771593570709,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009742438420653343,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009742438420653343,
"signal/format_reward/centered_abs_mean": 0.010498046875,
"signal/format_reward/group_std_mean": 0.02852254919707775,
"signal/format_reward/group_zero_std_frac": 0.846875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0052490234375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0052490234375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005007585696876049,
"signal/frontier_aurc_reward/group_std_mean": 0.007518738508224487,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.963578147813678e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.963578147813678e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12791292518377304,
"signal/frontier_coverage_1/group_std_mean": 0.19102973639965057,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_10/centered_abs_mean": 0.12791292518377304,
"signal/frontier_coverage_10/group_std_mean": 0.19102973639965057,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_15/centered_abs_mean": 0.12791292518377304,
"signal/frontier_coverage_15/group_std_mean": 0.19102973639965057,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_20/centered_abs_mean": 0.12791292518377304,
"signal/frontier_coverage_20/group_std_mean": 0.19102973639965057,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_25/centered_abs_mean": 0.12791292518377304,
"signal/frontier_coverage_25/group_std_mean": 0.19102973639965057,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_5/centered_abs_mean": 0.12791292518377304,
"signal/frontier_coverage_5/group_std_mean": 0.19102973639965057,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002289641345851123,
"signal/frontier_ece_reward/centered_abs_mean": 0.10605086386203766,
"signal/frontier_ece_reward/group_std_mean": 0.130093914270401,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013256357982754707,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013256357982754707,
"step": 25
},
{
"calibration/aurc": 0.6363392412690098,
"calibration/batch_distribution_entropy": 0.8753942580216417,
"calibration/buffer_distribution_entropy": 0.8046044264239305,
"calibration/confidence_entropy": 0.536024070863581,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3191686730442882,
"calibration/mean_confidence": 0.6114342021814927,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 592.4,
"completions/max_terminated_length": 592.4,
"completions/mean_length": 109.39501953125,
"completions/mean_terminated_length": 109.44864196777344,
"completions/min_length": 6.8,
"completions/min_terminated_length": 40.4,
"epoch": 0.096,
"grad_norm": 0.002386895241215825,
"learning_rate": 1e-06,
"loss": -0.0012,
"num_tokens": 99583022.0,
"reward": 0.8758266925811767,
"reward_std": 0.1546397477388382,
"rewards/accuracy_reward": 0.365625,
"rewards/brier_reward": 0.6748013496398926,
"rewards/confidence_uniqueness_reward": 0.8361028790473938,
"rewards/format_reward": 0.99677734375,
"rewards/frontier_aurc_reward": -0.0066743393428623675,
"rewards/frontier_coverage_1": 0.08200703710317611,
"rewards/frontier_coverage_10": 0.08200703710317611,
"rewards/frontier_coverage_15": 0.08200703710317611,
"rewards/frontier_coverage_20": 0.08200703710317611,
"rewards/frontier_coverage_25": 0.08200703710317611,
"rewards/frontier_coverage_5": 0.08200703710317611,
"rewards/frontier_ece_reward": -0.023404643405228853,
"signal/accuracy_reward/centered_abs_mean": 0.1758544921875,
"signal/accuracy_reward/group_std_mean": 0.22659938931465148,
"signal/accuracy_reward/group_zero_std_frac": 0.371875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08792724609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08792724609375,
"signal/advantage_abs_mean": 0.11989345848560333,
"signal/advantage_pre_scale_abs_mean": 0.11989345848560333,
"signal/advantage_pre_scale_std": 0.17157818973064423,
"signal/advantage_std": 0.17157818973064423,
"signal/brier_reward/centered_abs_mean": 0.19934343099594115,
"signal/brier_reward/group_std_mean": 0.24739857614040375,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024917928874492644,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024917928874492644,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08760613948106766,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11381718665361404,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010950767435133457,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010950767435133457,
"signal/format_reward/centered_abs_mean": 0.006219482421875,
"signal/format_reward/group_std_mean": 0.017557479441165924,
"signal/format_reward/group_zero_std_frac": 0.903125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0031097412109375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0031097412109375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004683180712163448,
"signal/frontier_aurc_reward/group_std_mean": 0.007812988758087159,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.382893720408902e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.382893720408902e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17279436886310579,
"signal/frontier_coverage_1/group_std_mean": 0.2367533653974533,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_10/centered_abs_mean": 0.17279436886310579,
"signal/frontier_coverage_10/group_std_mean": 0.2367533653974533,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_15/centered_abs_mean": 0.17279436886310579,
"signal/frontier_coverage_15/group_std_mean": 0.2367533653974533,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_20/centered_abs_mean": 0.17279436886310579,
"signal/frontier_coverage_20/group_std_mean": 0.2367533653974533,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_25/centered_abs_mean": 0.17279436886310579,
"signal/frontier_coverage_25/group_std_mean": 0.2367533653974533,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_5/centered_abs_mean": 0.17279436886310579,
"signal/frontier_coverage_5/group_std_mean": 0.2367533653974533,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003093018988147378,
"signal/frontier_ece_reward/centered_abs_mean": 0.08607159703969955,
"signal/frontier_ece_reward/group_std_mean": 0.10561716556549072,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010758949629962444,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010758949629962444,
"step": 30
},
{
"calibration/aurc": 0.489450247135421,
"calibration/batch_distribution_entropy": 0.9097859047670923,
"calibration/buffer_distribution_entropy": 0.886473587522856,
"calibration/confidence_entropy": 0.5542702844742003,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.01761252446183953,
"calibration/coverage@25%": 0.028180039138943246,
"calibration/coverage@30%": 0.048532289628180035,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.15793039991187768,
"calibration/mean_confidence": 0.5090407187050581,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 732.4,
"completions/max_terminated_length": 732.4,
"completions/mean_length": 115.28974609375,
"completions/mean_terminated_length": 115.31214294433593,
"completions/min_length": 22.6,
"completions/min_terminated_length": 40.2,
"epoch": 0.112,
"grad_norm": 0.002525592688471079,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 115873061.0,
"reward": 0.908475935459137,
"reward_std": 0.13711616396903992,
"rewards/accuracy_reward": 0.40126953125,
"rewards/brier_reward": 0.7265522956848145,
"rewards/confidence_uniqueness_reward": 0.86098210811615,
"rewards/format_reward": 0.99755859375,
"rewards/frontier_aurc_reward": -0.005971485190093518,
"rewards/frontier_coverage_1": 0.10243775844573974,
"rewards/frontier_coverage_10": 0.10243775844573974,
"rewards/frontier_coverage_15": 0.10243775844573974,
"rewards/frontier_coverage_20": 0.10243775844573974,
"rewards/frontier_coverage_25": 0.10243775844573974,
"rewards/frontier_coverage_5": 0.10243775844573974,
"rewards/frontier_ece_reward": -0.0021987749729305505,
"signal/accuracy_reward/centered_abs_mean": 0.174346923828125,
"signal/accuracy_reward/group_std_mean": 0.22295112013816834,
"signal/accuracy_reward/group_zero_std_frac": 0.390625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0871734619140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0871734619140625,
"signal/advantage_abs_mean": 0.1071009561419487,
"signal/advantage_pre_scale_abs_mean": 0.1071009561419487,
"signal/advantage_pre_scale_std": 0.15131535828113557,
"signal/advantage_std": 0.15131535828113557,
"signal/brier_reward/centered_abs_mean": 0.1822745144367218,
"signal/brier_reward/group_std_mean": 0.23006103336811065,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022784314304590225,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022784314304590225,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08360175788402557,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10827527344226837,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010450219735503197,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010450219735503197,
"signal/format_reward/centered_abs_mean": 0.004730224609375,
"signal/format_reward/group_std_mean": 0.013810678757727146,
"signal/format_reward/group_zero_std_frac": 0.921875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0023651123046875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0023651123046875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004599428828805685,
"signal/frontier_aurc_reward/group_std_mean": 0.008067583758383989,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.232977124862373e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.232977124862373e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2317400634288788,
"signal/frontier_coverage_1/group_std_mean": 0.2976967990398407,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_10/centered_abs_mean": 0.2317400634288788,
"signal/frontier_coverage_10/group_std_mean": 0.2976967990398407,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_15/centered_abs_mean": 0.2317400634288788,
"signal/frontier_coverage_15/group_std_mean": 0.2976967990398407,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_20/centered_abs_mean": 0.2317400634288788,
"signal/frontier_coverage_20/group_std_mean": 0.2976967990398407,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_25/centered_abs_mean": 0.2317400634288788,
"signal/frontier_coverage_25/group_std_mean": 0.2976967990398407,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_5/centered_abs_mean": 0.2317400634288788,
"signal/frontier_coverage_5/group_std_mean": 0.2976967990398407,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004148146975785494,
"signal/frontier_ece_reward/centered_abs_mean": 0.05894382745027542,
"signal/frontier_ece_reward/group_std_mean": 0.07574775069952011,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007367978431284427,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007367978431284427,
"step": 35
},
{
"calibration/aurc": 0.5206989576745183,
"calibration/batch_distribution_entropy": 0.8672090816408395,
"calibration/buffer_distribution_entropy": 0.9258209629086338,
"calibration/confidence_entropy": 0.5502407861991806,
"calibration/coverage@0%": 0.000390625,
"calibration/coverage@1%": 0.000390625,
"calibration/coverage@10%": 0.000390625,
"calibration/coverage@15%": 0.000390625,
"calibration/coverage@20%": 0.012915086839530332,
"calibration/coverage@25%": 0.022699822651663405,
"calibration/coverage@30%": 0.05239802470645792,
"calibration/coverage@5%": 0.000390625,
"calibration/ece": 0.14915996740844817,
"calibration/mean_confidence": 0.3689266180239892,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 532.0,
"completions/max_terminated_length": 532.0,
"completions/mean_length": 126.098046875,
"completions/mean_terminated_length": 126.17163696289063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 49.0,
"epoch": 0.128,
"grad_norm": 0.0011667232029139996,
"learning_rate": 1e-06,
"loss": -0.0011,
"num_tokens": 132080977.0,
"reward": 0.9204171657562256,
"reward_std": 0.10757582783699035,
"rewards/accuracy_reward": 0.4150390625,
"rewards/brier_reward": 0.740708875656128,
"rewards/confidence_uniqueness_reward": 0.8622852087020874,
"rewards/format_reward": 0.99853515625,
"rewards/frontier_aurc_reward": -0.005713082384318113,
"rewards/frontier_coverage_1": 0.11871729344129563,
"rewards/frontier_coverage_10": 0.11871729344129563,
"rewards/frontier_coverage_15": 0.11871729344129563,
"rewards/frontier_coverage_20": 0.11871729344129563,
"rewards/frontier_coverage_25": 0.11871729344129563,
"rewards/frontier_coverage_5": 0.11871729344129563,
"rewards/frontier_ece_reward": 0.004862516885623336,
"signal/accuracy_reward/centered_abs_mean": 0.15028076171875,
"signal/accuracy_reward/group_std_mean": 0.1892428368330002,
"signal/accuracy_reward/group_zero_std_frac": 0.5,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.075140380859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.075140380859375,
"signal/advantage_abs_mean": 0.08380529284477234,
"signal/advantage_pre_scale_abs_mean": 0.08380529284477234,
"signal/advantage_pre_scale_std": 0.12132732272148132,
"signal/advantage_std": 0.12132732272148132,
"signal/brier_reward/centered_abs_mean": 0.16387847065925598,
"signal/brier_reward/group_std_mean": 0.2097865968942642,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020484808832406998,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020484808832406998,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08669483661651611,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11006910055875778,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010836854577064514,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010836854577064514,
"signal/format_reward/centered_abs_mean": 0.002838134765625,
"signal/format_reward/group_std_mean": 0.008286407403647899,
"signal/format_reward/group_zero_std_frac": 0.953125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014190673828125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0014190673828125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004314260836690664,
"signal/frontier_aurc_reward/group_std_mean": 0.007658246159553528,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.722526643192396e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.722526643192396e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.25970604717731477,
"signal/frontier_coverage_1/group_std_mean": 0.3267000436782837,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_10/centered_abs_mean": 0.25970604717731477,
"signal/frontier_coverage_10/group_std_mean": 0.3267000436782837,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_15/centered_abs_mean": 0.25970604717731477,
"signal/frontier_coverage_15/group_std_mean": 0.3267000436782837,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_20/centered_abs_mean": 0.25970604717731477,
"signal/frontier_coverage_20/group_std_mean": 0.3267000436782837,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_25/centered_abs_mean": 0.25970604717731477,
"signal/frontier_coverage_25/group_std_mean": 0.3267000436782837,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_5/centered_abs_mean": 0.25970604717731477,
"signal/frontier_coverage_5/group_std_mean": 0.3267000436782837,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0046487381681799885,
"signal/frontier_ece_reward/centered_abs_mean": 0.03048998787999153,
"signal/frontier_ece_reward/group_std_mean": 0.044067969918251036,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038112484849989413,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038112484849989413,
"step": 40
},
{
"calibration/aurc": 0.3493427633203418,
"calibration/batch_distribution_entropy": 0.8422584840336123,
"calibration/buffer_distribution_entropy": 0.8986011249508191,
"calibration/confidence_entropy": 0.5100431661510836,
"calibration/coverage@0%": 0.007827788649706457,
"calibration/coverage@1%": 0.007827788649706457,
"calibration/coverage@10%": 0.034823721868884536,
"calibration/coverage@15%": 0.08761390044031311,
"calibration/coverage@20%": 0.11694211717221134,
"calibration/coverage@25%": 0.25469285102739725,
"calibration/coverage@30%": 0.31218887597847356,
"calibration/coverage@5%": 0.00939334637964775,
"calibration/ece": 0.2896374445454886,
"calibration/mean_confidence": 0.3102206767852354,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 360.0,
"completions/max_terminated_length": 360.0,
"completions/mean_length": 132.58232421875,
"completions/mean_terminated_length": 132.58232421875,
"completions/min_length": 48.6,
"completions/min_terminated_length": 48.6,
"epoch": 0.144,
"grad_norm": 0.0015174931613728404,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 148389052.0,
"reward": 0.9515798449516296,
"reward_std": 0.09809240400791168,
"rewards/accuracy_reward": 0.50029296875,
"rewards/brier_reward": 0.7102538347244263,
"rewards/confidence_uniqueness_reward": 0.8570566415786743,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0047408781945705416,
"rewards/frontier_coverage_1": 0.0487132525537163,
"rewards/frontier_coverage_10": 0.0487132525537163,
"rewards/frontier_coverage_15": 0.0487132525537163,
"rewards/frontier_coverage_20": 0.0487132525537163,
"rewards/frontier_coverage_25": 0.0487132525537163,
"rewards/frontier_coverage_5": 0.0487132525537163,
"rewards/frontier_ece_reward": 0.004543364420533181,
"signal/accuracy_reward/centered_abs_mean": 0.153765869140625,
"signal/accuracy_reward/group_std_mean": 0.20359255671501159,
"signal/accuracy_reward/group_zero_std_frac": 0.41875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0768829345703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0768829345703125,
"signal/advantage_abs_mean": 0.07638602703809738,
"signal/advantage_pre_scale_abs_mean": 0.07638602703809738,
"signal/advantage_pre_scale_std": 0.1084257572889328,
"signal/advantage_std": 0.1084257572889328,
"signal/brier_reward/centered_abs_mean": 0.1644006758928299,
"signal/brier_reward/group_std_mean": 0.20843989551067352,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020550084486603736,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020550084486603736,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09438176602125167,
"signal/confidence_uniqueness_reward/group_std_mean": 0.12099749445915223,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011797720752656459,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011797720752656459,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004868277069181204,
"signal/frontier_aurc_reward/group_std_mean": 0.00864907018840313,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.71421565534547e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.71421565534547e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.27776241302490234,
"signal/frontier_coverage_1/group_std_mean": 0.34930204749107363,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_10/centered_abs_mean": 0.27776241302490234,
"signal/frontier_coverage_10/group_std_mean": 0.34930204749107363,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_15/centered_abs_mean": 0.27776241302490234,
"signal/frontier_coverage_15/group_std_mean": 0.34930204749107363,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_20/centered_abs_mean": 0.27776241302490234,
"signal/frontier_coverage_20/group_std_mean": 0.34930204749107363,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_25/centered_abs_mean": 0.27776241302490234,
"signal/frontier_coverage_25/group_std_mean": 0.34930204749107363,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_5/centered_abs_mean": 0.27776241302490234,
"signal/frontier_coverage_5/group_std_mean": 0.34930204749107363,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004971946869045496,
"signal/frontier_ece_reward/centered_abs_mean": 0.025735930353403092,
"signal/frontier_ece_reward/group_std_mean": 0.03516030982136727,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032169912941753865,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032169912941753865,
"step": 45
},
{
"calibration/aurc": 0.429909830503399,
"calibration/batch_distribution_entropy": 0.8331752745278456,
"calibration/buffer_distribution_entropy": 0.8521642786308291,
"calibration/confidence_entropy": 0.4922261092968851,
"calibration/coverage@0%": 0.001953889432485323,
"calibration/coverage@1%": 0.001953889432485323,
"calibration/coverage@10%": 0.02074058219178082,
"calibration/coverage@15%": 0.02894370719178082,
"calibration/coverage@20%": 0.050457895058708416,
"calibration/coverage@25%": 0.0696313906555773,
"calibration/coverage@30%": 0.11065924657534247,
"calibration/coverage@5%": 0.001953889432485323,
"calibration/ece": 0.18033031522065138,
"calibration/mean_confidence": 0.2890452733923623,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 384.8,
"completions/max_terminated_length": 384.8,
"completions/mean_length": 140.3033203125,
"completions/mean_terminated_length": 140.33045959472656,
"completions/min_length": 36.0,
"completions/min_terminated_length": 59.8,
"epoch": 0.16,
"grad_norm": 0.001327816746197641,
"learning_rate": 1e-06,
"loss": -0.0007,
"num_tokens": 164846686.0,
"reward": 0.9375945091247558,
"reward_std": 0.09603821486234665,
"rewards/accuracy_reward": 0.44765625,
"rewards/brier_reward": 0.7351169109344482,
"rewards/confidence_uniqueness_reward": 0.8737802147865296,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.006393326632678509,
"rewards/frontier_coverage_1": 0.11687923520803452,
"rewards/frontier_coverage_10": 0.11687923520803452,
"rewards/frontier_coverage_15": 0.11687923520803452,
"rewards/frontier_coverage_20": 0.10851382911205291,
"rewards/frontier_coverage_25": 0.09542302042245865,
"rewards/frontier_coverage_5": 0.11687923520803452,
"rewards/frontier_ece_reward": 0.009122074954211712,
"signal/accuracy_reward/centered_abs_mean": 0.149560546875,
"signal/accuracy_reward/group_std_mean": 0.19247543215751647,
"signal/accuracy_reward/group_zero_std_frac": 0.46875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0747802734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0747802734375,
"signal/advantage_abs_mean": 0.07515855580568313,
"signal/advantage_pre_scale_abs_mean": 0.07515855580568313,
"signal/advantage_pre_scale_std": 0.10808514952659606,
"signal/advantage_std": 0.10808514952659606,
"signal/brier_reward/centered_abs_mean": 0.15984438359737396,
"signal/brier_reward/group_std_mean": 0.20464283525943755,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019980547949671745,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019980547949671745,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08056806325912476,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10358563214540481,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010071007907390595,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010071007907390595,
"signal/format_reward/centered_abs_mean": 0.00150146484375,
"signal/format_reward/group_std_mean": 0.004083108901977539,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000750732421875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.007270704582333564,
"signal/frontier_aurc_reward/group_std_mean": 0.012277528084814549,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013014561554882675,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013014561554882675,
"signal/frontier_coverage_1/centered_abs_mean": 0.27668232321739195,
"signal/frontier_coverage_1/group_std_mean": 0.34649158716201783,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004952613543719053,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004952613543719053,
"signal/frontier_coverage_10/centered_abs_mean": 0.27668232321739195,
"signal/frontier_coverage_10/group_std_mean": 0.34649158716201783,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004952613543719053,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004952613543719053,
"signal/frontier_coverage_15/centered_abs_mean": 0.27668232321739195,
"signal/frontier_coverage_15/group_std_mean": 0.34649158716201783,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004952613543719053,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004952613543719053,
"signal/frontier_coverage_20/centered_abs_mean": 0.26259068548679354,
"signal/frontier_coverage_20/group_std_mean": 0.3295305907726288,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004700373206287622,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004700373206287622,
"signal/frontier_coverage_25/centered_abs_mean": 0.2343019276857376,
"signal/frontier_coverage_25/group_std_mean": 0.2939989745616913,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004194004368036986,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004194004368036986,
"signal/frontier_coverage_5/centered_abs_mean": 0.27668232321739195,
"signal/frontier_coverage_5/group_std_mean": 0.34649158716201783,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004952613543719053,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004952613543719053,
"signal/frontier_ece_reward/centered_abs_mean": 0.026442524790763856,
"signal/frontier_ece_reward/group_std_mean": 0.03394659385085106,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003305315598845482,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003305315598845482,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.6283905468153964,
"eval_calibration/batch_distribution_entropy": 0.8246020608481337,
"eval_calibration/buffer_distribution_entropy": 0.8438652256203689,
"eval_calibration/confidence_entropy": 0.5230129575921325,
"eval_calibration/coverage@0%": 0.03125,
"eval_calibration/coverage@1%": 0.03125,
"eval_calibration/coverage@10%": 0.03125,
"eval_calibration/coverage@15%": 0.0625,
"eval_calibration/coverage@20%": 0.0625,
"eval_calibration/coverage@25%": 0.0703125,
"eval_calibration/coverage@30%": 0.078125,
"eval_calibration/coverage@5%": 0.03125,
"eval_calibration/ece": 0.20668557314797104,
"eval_calibration/mean_confidence": 0.338716823147971,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 286.75,
"eval_completions/max_terminated_length": 286.75,
"eval_completions/mean_length": 145.7804412841797,
"eval_completions/mean_terminated_length": 145.7804412841797,
"eval_completions/min_length": 72.5,
"eval_completions/min_terminated_length": 72.5,
"eval_loss": 0.0,
"eval_num_tokens": 164846686.0,
"eval_reward": 0.903352826833725,
"eval_reward_std": 0.1771019734442234,
"eval_rewards/accuracy_reward": 0.365234375,
"eval_rewards/brier_reward": 0.7625665217638016,
"eval_rewards/confidence_uniqueness_reward": 0.8388671875,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.00893060932867229,
"eval_rewards/frontier_coverage_1": 0.1894116010516882,
"eval_rewards/frontier_coverage_10": 0.1894116010516882,
"eval_rewards/frontier_coverage_15": 0.1894116010516882,
"eval_rewards/frontier_coverage_20": 0.15809685923159122,
"eval_rewards/frontier_coverage_25": 0.13804291561245918,
"eval_rewards/frontier_coverage_5": 0.1894116010516882,
"eval_rewards/frontier_ece_reward": 0.014828308019787073,
"eval_runtime": 16.7982,
"eval_samples_per_second": 29.765,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4405517578125,
"eval_signal/accuracy_reward/group_std_mean": 0.4753512069582939,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22027587890625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22027587890625,
"eval_signal/advantage_abs_mean": 0.15582863986492157,
"eval_signal/advantage_pre_scale_abs_mean": 0.15582863986492157,
"eval_signal/advantage_pre_scale_std": 0.17532747611403465,
"eval_signal/advantage_std": 0.17532747611403465,
"eval_signal/brier_reward/centered_abs_mean": 0.20503579452633858,
"eval_signal/brier_reward/group_std_mean": 0.2495810128748417,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025629474315792322,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.025629474315792322,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0798797607421875,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09276183322072029,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009984970092773438,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009984970092773438,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.011487595969811082,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.019020321778953075,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00020562795907608233,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00020562795907608233,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.4566965103149414,
"eval_signal/frontier_coverage_1/group_std_mean": 0.5377178192138672,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.008174867718480527,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.008174867718480527,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.4566965103149414,
"eval_signal/frontier_coverage_10/group_std_mean": 0.5377178192138672,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.008174867718480527,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.008174867718480527,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.4566965103149414,
"eval_signal/frontier_coverage_15/group_std_mean": 0.5377178192138672,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.008174867718480527,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.008174867718480527,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3946557566523552,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4646109938621521,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00706433760933578,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00706433760933578,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3415372520685196,
"eval_signal/frontier_coverage_25/group_std_mean": 0.4041067510843277,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006113516399636865,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006113516399636865,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.4566965103149414,
"eval_signal/frontier_coverage_5/group_std_mean": 0.5377178192138672,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.008174867718480527,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.008174867718480527,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03579251281917095,
"eval_signal/frontier_ece_reward/group_std_mean": 0.04359094426035881,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004474064102396369,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004474064102396369,
"eval_steps_per_second": 0.238,
"step": 50
},
{
"calibration/aurc": 0.4299541464994219,
"calibration/batch_distribution_entropy": 0.9117928962628777,
"calibration/buffer_distribution_entropy": 0.8590487288442219,
"calibration/confidence_entropy": 0.5176975718720362,
"calibration/coverage@0%": 0.0015625,
"calibration/coverage@1%": 0.0015625,
"calibration/coverage@10%": 0.0015625,
"calibration/coverage@15%": 0.0015625,
"calibration/coverage@20%": 0.0015625,
"calibration/coverage@25%": 0.00625,
"calibration/coverage@30%": 0.181640625,
"calibration/coverage@5%": 0.0015625,
"calibration/ece": 0.20829897379675658,
"calibration/mean_confidence": 0.35808930745324347,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 423.4,
"completions/max_terminated_length": 423.4,
"completions/mean_length": 149.642578125,
"completions/mean_terminated_length": 149.6872589111328,
"completions/min_length": 36.6,
"completions/min_terminated_length": 62.6,
"epoch": 0.176,
"grad_norm": 0.0011578703997656703,
"learning_rate": 1e-06,
"loss": -0.0004,
"num_tokens": 181616146.0,
"reward": 0.9438661813735962,
"reward_std": 0.09843092411756516,
"rewards/accuracy_reward": 0.4484375,
"rewards/brier_reward": 0.7445629596710205,
"rewards/confidence_uniqueness_reward": 0.9034521102905273,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.007898070383816957,
"rewards/frontier_coverage_1": 0.12487641721963882,
"rewards/frontier_coverage_10": 0.12487641721963882,
"rewards/frontier_coverage_15": 0.12487641721963882,
"rewards/frontier_coverage_20": 0.12381393015384674,
"rewards/frontier_coverage_25": 0.10958583354949951,
"rewards/frontier_coverage_5": 0.12487641721963882,
"rewards/frontier_ece_reward": 0.006905978079885244,
"signal/accuracy_reward/centered_abs_mean": 0.152099609375,
"signal/accuracy_reward/group_std_mean": 0.1922013580799103,
"signal/accuracy_reward/group_zero_std_frac": 0.48125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0760498046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0760498046875,
"signal/advantage_abs_mean": 0.07791332751512528,
"signal/advantage_pre_scale_abs_mean": 0.07791332751512528,
"signal/advantage_pre_scale_std": 0.11179401725530624,
"signal/advantage_std": 0.11179401725530624,
"signal/brier_reward/centered_abs_mean": 0.1640268325805664,
"signal/brier_reward/group_std_mean": 0.2074387788772583,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0205033540725708,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0205033540725708,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.055974191427230834,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07065004408359528,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006996773928403854,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006996773928403854,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.009295025281608105,
"signal/frontier_aurc_reward/group_std_mean": 0.015336821600794793,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00016638094675727188,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00016638094675727188,
"signal/frontier_coverage_1/centered_abs_mean": 0.2718194603919983,
"signal/frontier_coverage_1/group_std_mean": 0.3359232068061829,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004865568224340677,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004865568224340677,
"signal/frontier_coverage_10/centered_abs_mean": 0.2718194603919983,
"signal/frontier_coverage_10/group_std_mean": 0.3359232068061829,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004865568224340677,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004865568224340677,
"signal/frontier_coverage_15/centered_abs_mean": 0.2718194603919983,
"signal/frontier_coverage_15/group_std_mean": 0.3359232068061829,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004865568224340677,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004865568224340677,
"signal/frontier_coverage_20/centered_abs_mean": 0.25732233226299284,
"signal/frontier_coverage_20/group_std_mean": 0.3185389518737793,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004606069531291723,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004606069531291723,
"signal/frontier_coverage_25/centered_abs_mean": 0.21424658000469207,
"signal/frontier_coverage_25/group_std_mean": 0.265936878323555,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003835013695061207,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003835013695061207,
"signal/frontier_coverage_5/centered_abs_mean": 0.2718194603919983,
"signal/frontier_coverage_5/group_std_mean": 0.3359232068061829,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004865568224340677,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004865568224340677,
"signal/frontier_ece_reward/centered_abs_mean": 0.02115457020699978,
"signal/frontier_ece_reward/group_std_mean": 0.027090443298220634,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026443212758749724,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026443212758749724,
"step": 55
},
{
"calibration/aurc": 0.34666018842884555,
"calibration/batch_distribution_entropy": 0.9539929411938755,
"calibration/buffer_distribution_entropy": 0.9131626800835845,
"calibration/confidence_entropy": 0.5139800747204213,
"calibration/coverage@0%": 0.010546875,
"calibration/coverage@1%": 0.010546875,
"calibration/coverage@10%": 0.035546875,
"calibration/coverage@15%": 0.089453125,
"calibration/coverage@20%": 0.13984375,
"calibration/coverage@25%": 0.207421875,
"calibration/coverage@30%": 0.356640625,
"calibration/coverage@5%": 0.010546875,
"calibration/ece": 0.1272009158431427,
"calibration/mean_confidence": 0.40132151066502936,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 429.6,
"completions/max_terminated_length": 429.6,
"completions/mean_length": 155.38896484375,
"completions/mean_terminated_length": 155.41973571777345,
"completions/min_length": 56.0,
"completions/min_terminated_length": 68.6,
"epoch": 0.192,
"grad_norm": 0.0012557959416881204,
"learning_rate": 1e-06,
"loss": -0.0004,
"num_tokens": 198022145.0,
"reward": 0.9583419919013977,
"reward_std": 0.09908241480588913,
"rewards/accuracy_reward": 0.47001953125,
"rewards/brier_reward": 0.7584280371665955,
"rewards/confidence_uniqueness_reward": 0.9222665786743164,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.006594687420874834,
"rewards/frontier_coverage_1": 0.12110985442996025,
"rewards/frontier_coverage_10": 0.12110985442996025,
"rewards/frontier_coverage_15": 0.1202224388718605,
"rewards/frontier_coverage_20": 0.11801767498254775,
"rewards/frontier_coverage_25": 0.10794311836361885,
"rewards/frontier_coverage_5": 0.12110985442996025,
"rewards/frontier_ece_reward": 0.0068677449598908424,
"signal/accuracy_reward/centered_abs_mean": 0.143133544921875,
"signal/accuracy_reward/group_std_mean": 0.1828240841627121,
"signal/accuracy_reward/group_zero_std_frac": 0.5,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0715667724609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0715667724609375,
"signal/advantage_abs_mean": 0.07796682193875312,
"signal/advantage_pre_scale_abs_mean": 0.07796682193875312,
"signal/advantage_pre_scale_std": 0.11373708546161651,
"signal/advantage_std": 0.11373708546161651,
"signal/brier_reward/centered_abs_mean": 0.1666719675064087,
"signal/brier_reward/group_std_mean": 0.20975261926651,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020833995938301087,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020833995938301087,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04403134733438492,
"signal/confidence_uniqueness_reward/group_std_mean": 0.053325545787811277,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005503918416798115,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005503918416798115,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.008081774506717921,
"signal/frontier_aurc_reward/group_std_mean": 0.01262767445296049,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00014466376742348075,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00014466376742348075,
"signal/frontier_coverage_1/centered_abs_mean": 0.26113094091415406,
"signal/frontier_coverage_1/group_std_mean": 0.3249383449554443,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004674243833869695,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004674243833869695,
"signal/frontier_coverage_10/centered_abs_mean": 0.26113094091415406,
"signal/frontier_coverage_10/group_std_mean": 0.3249383449554443,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004674243833869695,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004674243833869695,
"signal/frontier_coverage_15/centered_abs_mean": 0.2587172448635101,
"signal/frontier_coverage_15/group_std_mean": 0.3219481885433197,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0046310387551784515,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0046310387551784515,
"signal/frontier_coverage_20/centered_abs_mean": 0.2515134871006012,
"signal/frontier_coverage_20/group_std_mean": 0.31304178237915037,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004502091184258461,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004502091184258461,
"signal/frontier_coverage_25/centered_abs_mean": 0.22166378796100616,
"signal/frontier_coverage_25/group_std_mean": 0.27650326192379,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003967781597748399,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003967781597748399,
"signal/frontier_coverage_5/centered_abs_mean": 0.26113094091415406,
"signal/frontier_coverage_5/group_std_mean": 0.3249383449554443,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004674243833869695,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004674243833869695,
"signal/frontier_ece_reward/centered_abs_mean": 0.01634953673928976,
"signal/frontier_ece_reward/group_std_mean": 0.02110195942223072,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00204369209241122,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00204369209241122,
"step": 60
},
{
"calibration/aurc": 0.3068922258740809,
"calibration/batch_distribution_entropy": 0.9836930371808007,
"calibration/buffer_distribution_entropy": 0.9652891609187755,
"calibration/confidence_entropy": 0.49658896491924426,
"calibration/coverage@0%": 0.004296875,
"calibration/coverage@1%": 0.004296875,
"calibration/coverage@10%": 0.11328125,
"calibration/coverage@15%": 0.18359375,
"calibration/coverage@20%": 0.2453125,
"calibration/coverage@25%": 0.462890625,
"calibration/coverage@30%": 0.5765625,
"calibration/coverage@5%": 0.021484375,
"calibration/ece": 0.17242096229486084,
"calibration/mean_confidence": 0.4601654658599047,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 401.2,
"completions/max_terminated_length": 401.2,
"completions/mean_length": 161.915625,
"completions/mean_terminated_length": 161.97821655273438,
"completions/min_length": 28.6,
"completions/min_terminated_length": 71.2,
"epoch": 0.208,
"grad_norm": 0.0012177525786682963,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 214712385.0,
"reward": 0.9798494100570678,
"reward_std": 0.10657155811786652,
"rewards/accuracy_reward": 0.518359375,
"rewards/brier_reward": 0.7560826063156127,
"rewards/confidence_uniqueness_reward": 0.938948118686676,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.006415044050663709,
"rewards/frontier_coverage_1": 0.08197853565216065,
"rewards/frontier_coverage_10": 0.08135704249143601,
"rewards/frontier_coverage_15": 0.08022502958774566,
"rewards/frontier_coverage_20": 0.07670193687081336,
"rewards/frontier_coverage_25": 0.06808920502662659,
"rewards/frontier_coverage_5": 0.08197853565216065,
"rewards/frontier_ece_reward": 0.005847309483215213,
"signal/accuracy_reward/centered_abs_mean": 0.14227294921875,
"signal/accuracy_reward/group_std_mean": 0.18765878975391387,
"signal/accuracy_reward/group_zero_std_frac": 0.465625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071136474609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.071136474609375,
"signal/advantage_abs_mean": 0.08238936513662339,
"signal/advantage_pre_scale_abs_mean": 0.08238936513662339,
"signal/advantage_pre_scale_std": 0.12257505804300309,
"signal/advantage_std": 0.12257505804300309,
"signal/brier_reward/centered_abs_mean": 0.1727546989917755,
"signal/brier_reward/group_std_mean": 0.2174914598464966,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02159433737397194,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02159433737397194,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03329867124557495,
"signal/confidence_uniqueness_reward/group_std_mean": 0.041111374646425246,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004162333905696869,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004162333905696869,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.007987076882272959,
"signal/frontier_aurc_reward/group_std_mean": 0.013466766290366649,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00014296866720542312,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00014296866720542312,
"signal/frontier_coverage_1/centered_abs_mean": 0.24193367958068848,
"signal/frontier_coverage_1/group_std_mean": 0.3065946936607361,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004330612625926733,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004330612625926733,
"signal/frontier_coverage_10/centered_abs_mean": 0.23778684437274933,
"signal/frontier_coverage_10/group_std_mean": 0.30148497223854065,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0042563843540847305,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0042563843540847305,
"signal/frontier_coverage_15/centered_abs_mean": 0.22926851511001586,
"signal/frontier_coverage_15/group_std_mean": 0.2908865034580231,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00410390617325902,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00410390617325902,
"signal/frontier_coverage_20/centered_abs_mean": 0.21191688477993012,
"signal/frontier_coverage_20/group_std_mean": 0.2694344460964203,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037933120504021643,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037933120504021643,
"signal/frontier_coverage_25/centered_abs_mean": 0.1667758345603943,
"signal/frontier_coverage_25/group_std_mean": 0.21348237097263337,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002985287271440029,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002985287271440029,
"signal/frontier_coverage_5/centered_abs_mean": 0.24193367958068848,
"signal/frontier_coverage_5/group_std_mean": 0.3065946936607361,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004330612625926733,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004330612625926733,
"signal/frontier_ece_reward/centered_abs_mean": 0.01677522622048855,
"signal/frontier_ece_reward/group_std_mean": 0.021132436394691468,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020969032775610685,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020969032775610685,
"step": 65
},
{
"calibration/aurc": 0.3200097212136897,
"calibration/batch_distribution_entropy": 0.9875934919652977,
"calibration/buffer_distribution_entropy": 0.9879163461611056,
"calibration/confidence_entropy": 0.4731915984410954,
"calibration/coverage@0%": 0.00782015931372549,
"calibration/coverage@1%": 0.00782015931372549,
"calibration/coverage@10%": 0.12666360294117646,
"calibration/coverage@15%": 0.19935355392156864,
"calibration/coverage@20%": 0.30967371323529413,
"calibration/coverage@25%": 0.40666053921568623,
"calibration/coverage@30%": 0.5044500612745099,
"calibration/coverage@5%": 0.00782015931372549,
"calibration/ece": 0.1488555930462697,
"calibration/mean_confidence": 0.45943806726296454,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 569.8,
"completions/max_terminated_length": 569.8,
"completions/mean_length": 163.84892578125,
"completions/mean_terminated_length": 163.927783203125,
"completions/min_length": 31.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.224,
"grad_norm": 0.0011713270796462893,
"learning_rate": 1e-06,
"loss": -0.0005,
"num_tokens": 231543382.0,
"reward": 0.9619293451309204,
"reward_std": 0.10324173122644424,
"rewards/accuracy_reward": 0.468359375,
"rewards/brier_reward": 0.769196379184723,
"rewards/confidence_uniqueness_reward": 0.9441241979598999,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.00692370580509305,
"rewards/frontier_coverage_1": 0.132365383207798,
"rewards/frontier_coverage_10": 0.13184804767370223,
"rewards/frontier_coverage_15": 0.12701425105333328,
"rewards/frontier_coverage_20": 0.11204912811517716,
"rewards/frontier_coverage_25": 0.0956237182021141,
"rewards/frontier_coverage_5": 0.132365383207798,
"rewards/frontier_ece_reward": 0.007294747978448868,
"signal/accuracy_reward/centered_abs_mean": 0.12672119140625,
"signal/accuracy_reward/group_std_mean": 0.17051705121994018,
"signal/accuracy_reward/group_zero_std_frac": 0.503125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.063360595703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.063360595703125,
"signal/advantage_abs_mean": 0.07872404158115387,
"signal/advantage_pre_scale_abs_mean": 0.07872404158115387,
"signal/advantage_pre_scale_std": 0.12147980481386185,
"signal/advantage_std": 0.12147980481386185,
"signal/brier_reward/centered_abs_mean": 0.1724269151687622,
"signal/brier_reward/group_std_mean": 0.21900182068347931,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021553364396095277,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021553364396095277,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03195973262190819,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03951691687107086,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003994966577738524,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003994966577738524,
"signal/format_reward/centered_abs_mean": 0.001123046875,
"signal/format_reward/group_std_mean": 0.0029782545287162067,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005615234375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.007875559106469154,
"signal/frontier_aurc_reward/group_std_mean": 0.013450121134519577,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00014097250532358886,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00014097250532358886,
"signal/frontier_coverage_1/centered_abs_mean": 0.22645123302936554,
"signal/frontier_coverage_1/group_std_mean": 0.2888976693153381,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004053476825356483,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004053476825356483,
"signal/frontier_coverage_10/centered_abs_mean": 0.22427822649478912,
"signal/frontier_coverage_10/group_std_mean": 0.28618112206459045,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004014580138027668,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004014580138027668,
"signal/frontier_coverage_15/centered_abs_mean": 0.21347157061100006,
"signal/frontier_coverage_15/group_std_mean": 0.27293606400489806,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038211409002542494,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038211409002542494,
"signal/frontier_coverage_20/centered_abs_mean": 0.1810237020254135,
"signal/frontier_coverage_20/group_std_mean": 0.2326923817396164,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003240324091166258,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003240324091166258,
"signal/frontier_coverage_25/centered_abs_mean": 0.1470854938030243,
"signal/frontier_coverage_25/group_std_mean": 0.18977726697921754,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002632830198854208,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002632830198854208,
"signal/frontier_coverage_5/centered_abs_mean": 0.22645123302936554,
"signal/frontier_coverage_5/group_std_mean": 0.2888976693153381,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004053476825356483,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004053476825356483,
"signal/frontier_ece_reward/centered_abs_mean": 0.01509154960513115,
"signal/frontier_ece_reward/group_std_mean": 0.019070441275835036,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018864437006413937,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018864437006413937,
"step": 70
},
{
"calibration/aurc": 0.39045969632858435,
"calibration/batch_distribution_entropy": 0.968501687803643,
"calibration/buffer_distribution_entropy": 0.9940884982492102,
"calibration/confidence_entropy": 0.46869571466245025,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.049609375,
"calibration/coverage@15%": 0.10625,
"calibration/coverage@20%": 0.16484375,
"calibration/coverage@25%": 0.226171875,
"calibration/coverage@30%": 0.26953125,
"calibration/coverage@5%": 0.011328125,
"calibration/ece": 0.17668582391545573,
"calibration/mean_confidence": 0.5169925887967606,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 440.4,
"completions/max_terminated_length": 440.4,
"completions/mean_length": 167.46328125,
"completions/mean_terminated_length": 167.46328125,
"completions/min_length": 70.2,
"completions/min_terminated_length": 70.2,
"epoch": 0.24,
"grad_norm": 0.0012115106219425797,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 248509886.0,
"reward": 0.9863848090171814,
"reward_std": 0.11233285069465637,
"rewards/accuracy_reward": 0.52998046875,
"rewards/brier_reward": 0.7531078338623047,
"rewards/confidence_uniqueness_reward": 0.9496447205543518,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.006484896969050169,
"rewards/frontier_coverage_1": 0.07747026761062444,
"rewards/frontier_coverage_10": 0.07747026761062444,
"rewards/frontier_coverage_15": 0.07708788146264851,
"rewards/frontier_coverage_20": 0.07453140085563063,
"rewards/frontier_coverage_25": 0.06235100794583559,
"rewards/frontier_coverage_5": 0.07747026761062444,
"rewards/frontier_ece_reward": 0.006582822371274233,
"signal/accuracy_reward/centered_abs_mean": 0.145330810546875,
"signal/accuracy_reward/group_std_mean": 0.19247545003890992,
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0726654052734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0726654052734375,
"signal/advantage_abs_mean": 0.08759264796972274,
"signal/advantage_pre_scale_abs_mean": 0.08759264796972274,
"signal/advantage_pre_scale_std": 0.13190509229898453,
"signal/advantage_std": 0.13190509229898453,
"signal/brier_reward/centered_abs_mean": 0.1856304883956909,
"signal/brier_reward/group_std_mean": 0.231916943192482,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023203811049461363,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.023203811049461363,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029887811467051505,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03774899542331696,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003735976433381438,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003735976433381438,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00786367803812027,
"signal/frontier_aurc_reward/group_std_mean": 0.013387826085090638,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001407598319929093,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001407598319929093,
"signal/frontier_coverage_1/centered_abs_mean": 0.21928218007087708,
"signal/frontier_coverage_1/group_std_mean": 0.28514992594718935,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039251509588211775,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039251509588211775,
"signal/frontier_coverage_10/centered_abs_mean": 0.21928218007087708,
"signal/frontier_coverage_10/group_std_mean": 0.28514992594718935,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039251509588211775,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039251509588211775,
"signal/frontier_coverage_15/centered_abs_mean": 0.21844083666801453,
"signal/frontier_coverage_15/group_std_mean": 0.2840579092502594,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003910091007128358,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003910091007128358,
"signal/frontier_coverage_20/centered_abs_mean": 0.20831224620342254,
"signal/frontier_coverage_20/group_std_mean": 0.2711889445781708,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037287891376763583,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037287891376763583,
"signal/frontier_coverage_25/centered_abs_mean": 0.16096322238445282,
"signal/frontier_coverage_25/group_std_mean": 0.21174894273281097,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028812415432184933,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028812415432184933,
"signal/frontier_coverage_5/centered_abs_mean": 0.21928218007087708,
"signal/frontier_coverage_5/group_std_mean": 0.28514992594718935,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039251509588211775,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039251509588211775,
"signal/frontier_ece_reward/centered_abs_mean": 0.015397474728524685,
"signal/frontier_ece_reward/group_std_mean": 0.019266339763998986,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019246843410655856,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019246843410655856,
"step": 75
},
{
"calibration/aurc": 0.28859004368151264,
"calibration/batch_distribution_entropy": 0.9515232366086724,
"calibration/buffer_distribution_entropy": 0.9886716858980806,
"calibration/confidence_entropy": 0.42647046227330876,
"calibration/coverage@0%": 0.014481409001956946,
"calibration/coverage@1%": 0.014481409001956946,
"calibration/coverage@10%": 0.20025914261252448,
"calibration/coverage@15%": 0.29257353840508804,
"calibration/coverage@20%": 0.37001437133072407,
"calibration/coverage@25%": 0.4838628302348337,
"calibration/coverage@30%": 0.5961564334637964,
"calibration/coverage@5%": 0.014481409001956946,
"calibration/ece": 0.12243031484871736,
"calibration/mean_confidence": 0.5223934809895115,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00166015625,
"completions/max_length": 739.0,
"completions/max_terminated_length": 739.0,
"completions/mean_length": 163.51669921875,
"completions/mean_terminated_length": 163.7890838623047,
"completions/min_length": 15.0,
"completions/min_terminated_length": 74.8,
"epoch": 0.256,
"grad_norm": 0.0021467828191816807,
"learning_rate": 1e-06,
"loss": -0.0012,
"num_tokens": 265239113.0,
"reward": 0.9797361373901368,
"reward_std": 0.10891171395778657,
"rewards/accuracy_reward": 0.505078125,
"rewards/brier_reward": 0.7687426209449768,
"rewards/confidence_uniqueness_reward": 0.9485057711601257,
"rewards/format_reward": 0.99814453125,
"rewards/frontier_aurc_reward": -0.0058880715630948545,
"rewards/frontier_coverage_1": 0.12136863321065902,
"rewards/frontier_coverage_10": 0.12136863321065902,
"rewards/frontier_coverage_15": 0.12136863321065902,
"rewards/frontier_coverage_20": 0.11860869899392128,
"rewards/frontier_coverage_25": 0.09261532202363014,
"rewards/frontier_coverage_5": 0.12136863321065902,
"rewards/frontier_ece_reward": 0.008826205506920815,
"signal/accuracy_reward/centered_abs_mean": 0.1375732421875,
"signal/accuracy_reward/group_std_mean": 0.17660146951675415,
"signal/accuracy_reward/group_zero_std_frac": 0.5125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06878662109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06878662109375,
"signal/advantage_abs_mean": 0.08314161151647567,
"signal/advantage_pre_scale_abs_mean": 0.08314161151647567,
"signal/advantage_pre_scale_std": 0.13095255494117736,
"signal/advantage_std": 0.13095255494117736,
"signal/brier_reward/centered_abs_mean": 0.17748952209949492,
"signal/brier_reward/group_std_mean": 0.2243036150932312,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022186190262436865,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022186190262436865,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031655368953943254,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04402093142271042,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003956921119242907,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003956921119242907,
"signal/format_reward/centered_abs_mean": 0.003570556640625,
"signal/format_reward/group_std_mean": 0.009823499154299498,
"signal/format_reward/group_zero_std_frac": 0.946875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0017852783203125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0017852783203125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.006781843490898609,
"signal/frontier_aurc_reward/group_std_mean": 0.0116763386875391,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00012139499158365652,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00012139499158365652,
"signal/frontier_coverage_1/centered_abs_mean": 0.2191675126552582,
"signal/frontier_coverage_1/group_std_mean": 0.2833190619945526,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039230983704328535,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039230983704328535,
"signal/frontier_coverage_10/centered_abs_mean": 0.2191675126552582,
"signal/frontier_coverage_10/group_std_mean": 0.2833190619945526,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039230983704328535,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039230983704328535,
"signal/frontier_coverage_15/centered_abs_mean": 0.2191675126552582,
"signal/frontier_coverage_15/group_std_mean": 0.2833190619945526,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039230983704328535,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039230983704328535,
"signal/frontier_coverage_20/centered_abs_mean": 0.21456872820854186,
"signal/frontier_coverage_20/group_std_mean": 0.27757467031478883,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038407801184803247,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038407801184803247,
"signal/frontier_coverage_25/centered_abs_mean": 0.1644558608531952,
"signal/frontier_coverage_25/group_std_mean": 0.2139045476913452,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00294375978410244,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00294375978410244,
"signal/frontier_coverage_5/centered_abs_mean": 0.2191675126552582,
"signal/frontier_coverage_5/group_std_mean": 0.2833190619945526,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039230983704328535,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039230983704328535,
"signal/frontier_ece_reward/centered_abs_mean": 0.0159053985029459,
"signal/frontier_ece_reward/group_std_mean": 0.019815226271748542,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019881748128682377,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019881748128682377,
"step": 80
},
{
"calibration/aurc": 0.3690741624394786,
"calibration/batch_distribution_entropy": 0.9778070118347933,
"calibration/buffer_distribution_entropy": 0.9777087376819719,
"calibration/confidence_entropy": 0.4561837914814368,
"calibration/coverage@0%": 0.0074302837573385514,
"calibration/coverage@1%": 0.0074302837573385514,
"calibration/coverage@10%": 0.07005259295499021,
"calibration/coverage@15%": 0.11975905088062622,
"calibration/coverage@20%": 0.21756283635029355,
"calibration/coverage@25%": 0.29500902030332676,
"calibration/coverage@30%": 0.36660270914872795,
"calibration/coverage@5%": 0.0168236301369863,
"calibration/ece": 0.14922172124037716,
"calibration/mean_confidence": 0.4930572188321797,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 549.8,
"completions/max_terminated_length": 549.8,
"completions/mean_length": 170.63310546875,
"completions/mean_terminated_length": 170.69947509765626,
"completions/min_length": 12.2,
"completions/min_terminated_length": 76.6,
"epoch": 0.272,
"grad_norm": 0.00112878845538944,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 281952092.0,
"reward": 0.9767104148864746,
"reward_std": 0.10500547885894776,
"rewards/accuracy_reward": 0.4994140625,
"rewards/brier_reward": 0.7602247357368469,
"rewards/confidence_uniqueness_reward": 0.9546234965324402,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.005274599511176347,
"rewards/frontier_coverage_1": 0.11853411160409451,
"rewards/frontier_coverage_10": 0.11824645064771175,
"rewards/frontier_coverage_15": 0.11713217757642269,
"rewards/frontier_coverage_20": 0.11143716983497143,
"rewards/frontier_coverage_25": 0.0879902683198452,
"rewards/frontier_coverage_5": 0.11853411160409451,
"rewards/frontier_ece_reward": 0.008065588586032391,
"signal/accuracy_reward/centered_abs_mean": 0.1313232421875,
"signal/accuracy_reward/group_std_mean": 0.17296063303947448,
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06566162109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06566162109375,
"signal/advantage_abs_mean": 0.0808216392993927,
"signal/advantage_pre_scale_abs_mean": 0.0808216392993927,
"signal/advantage_pre_scale_std": 0.12360534369945526,
"signal/advantage_std": 0.12360534369945526,
"signal/brier_reward/centered_abs_mean": 0.18187743723392485,
"signal/brier_reward/group_std_mean": 0.2294466108083725,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022734679654240607,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022734679654240607,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02392299771308899,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03248454742133618,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029903747141361236,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029903747141361236,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_std_mean": 0.0033145629800856113,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005552649311721325,
"signal/frontier_aurc_reward/group_std_mean": 0.009525079652667046,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.939241717802361e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.939241717802361e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2317501813173294,
"signal/frontier_coverage_1/group_std_mean": 0.29637727737426756,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004148328118026257,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004148328118026257,
"signal/frontier_coverage_10/centered_abs_mean": 0.23152775168418885,
"signal/frontier_coverage_10/group_std_mean": 0.2960933208465576,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004144346620887518,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004144346620887518,
"signal/frontier_coverage_15/centered_abs_mean": 0.23041679263114928,
"signal/frontier_coverage_15/group_std_mean": 0.29468045234680174,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004124460462480784,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004124460462480784,
"signal/frontier_coverage_20/centered_abs_mean": 0.21580785512924194,
"signal/frontier_coverage_20/group_std_mean": 0.2764699876308441,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038629604037851094,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038629604037851094,
"signal/frontier_coverage_25/centered_abs_mean": 0.1677774280309677,
"signal/frontier_coverage_25/group_std_mean": 0.21616117656230927,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003003215929493308,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003003215929493308,
"signal/frontier_coverage_5/centered_abs_mean": 0.2317501813173294,
"signal/frontier_coverage_5/group_std_mean": 0.29637727737426756,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004148328118026257,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004148328118026257,
"signal/frontier_ece_reward/centered_abs_mean": 0.015623759292066098,
"signal/frontier_ece_reward/group_std_mean": 0.019285344704985618,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019529699115082622,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019529699115082622,
"step": 85
},
{
"calibration/aurc": 0.3304410129425927,
"calibration/batch_distribution_entropy": 0.9744050038981842,
"calibration/buffer_distribution_entropy": 0.9809351056902752,
"calibration/confidence_entropy": 0.45289924250102276,
"calibration/coverage@0%": 0.003515625,
"calibration/coverage@1%": 0.003515625,
"calibration/coverage@10%": 0.055078125,
"calibration/coverage@15%": 0.116796875,
"calibration/coverage@20%": 0.180859375,
"calibration/coverage@25%": 0.26484375,
"calibration/coverage@30%": 0.372265625,
"calibration/coverage@5%": 0.025,
"calibration/ece": 0.12180083061825882,
"calibration/mean_confidence": 0.5229153803497594,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 414.4,
"completions/max_terminated_length": 414.4,
"completions/mean_length": 166.2126953125,
"completions/mean_terminated_length": 166.24555053710938,
"completions/min_length": 46.8,
"completions/min_terminated_length": 77.8,
"epoch": 0.288,
"grad_norm": 0.0012148990062996745,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 298612286.0,
"reward": 0.9814790964126587,
"reward_std": 0.1037442296743393,
"rewards/accuracy_reward": 0.50478515625,
"rewards/brier_reward": 0.7664803266525269,
"rewards/confidence_uniqueness_reward": 0.9558304190635681,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.005551142990589142,
"rewards/frontier_coverage_1": 0.12445419132709504,
"rewards/frontier_coverage_10": 0.12445419132709504,
"rewards/frontier_coverage_15": 0.12445419132709504,
"rewards/frontier_coverage_20": 0.1208130031824112,
"rewards/frontier_coverage_25": 0.10450499802827835,
"rewards/frontier_coverage_5": 0.12445419132709504,
"rewards/frontier_ece_reward": 0.00840447163209319,
"signal/accuracy_reward/centered_abs_mean": 0.137322998046875,
"signal/accuracy_reward/group_std_mean": 0.18192999064922333,
"signal/accuracy_reward/group_zero_std_frac": 0.478125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686614990234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0686614990234375,
"signal/advantage_abs_mean": 0.07975934892892837,
"signal/advantage_pre_scale_abs_mean": 0.07975934892892837,
"signal/advantage_pre_scale_std": 0.12279856950044632,
"signal/advantage_std": 0.12279856950044632,
"signal/brier_reward/centered_abs_mean": 0.17929280996322633,
"signal/brier_reward/group_std_mean": 0.22796185612678527,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02241160124540329,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02241160124540329,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022686892375349998,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029962728172540663,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028358615469187497,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028358615469187497,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005954787414520979,
"signal/frontier_aurc_reward/group_std_mean": 0.010394319705665112,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010659069230314344,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010659069230314344,
"signal/frontier_coverage_1/centered_abs_mean": 0.2316443383693695,
"signal/frontier_coverage_1/group_std_mean": 0.29880672693252563,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004146433435380459,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004146433435380459,
"signal/frontier_coverage_10/centered_abs_mean": 0.2316443383693695,
"signal/frontier_coverage_10/group_std_mean": 0.29880672693252563,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004146433435380459,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004146433435380459,
"signal/frontier_coverage_15/centered_abs_mean": 0.2316443383693695,
"signal/frontier_coverage_15/group_std_mean": 0.29880672693252563,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004146433435380459,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004146433435380459,
"signal/frontier_coverage_20/centered_abs_mean": 0.22515642046928405,
"signal/frontier_coverage_20/group_std_mean": 0.29070602655410765,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0040302996058017015,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0040302996058017015,
"signal/frontier_coverage_25/centered_abs_mean": 0.18494615852832794,
"signal/frontier_coverage_25/group_std_mean": 0.24011895954608917,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033105363138020037,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033105363138020037,
"signal/frontier_coverage_5/centered_abs_mean": 0.2316443383693695,
"signal/frontier_coverage_5/group_std_mean": 0.29880672693252563,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004146433435380459,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004146433435380459,
"signal/frontier_ece_reward/centered_abs_mean": 0.01566954664885998,
"signal/frontier_ece_reward/group_std_mean": 0.01951332613825798,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019586933311074974,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019586933311074974,
"step": 90
},
{
"calibration/aurc": 0.2928327730684395,
"calibration/batch_distribution_entropy": 0.9615355467395797,
"calibration/buffer_distribution_entropy": 0.9837028085518332,
"calibration/confidence_entropy": 0.4414839631441798,
"calibration/coverage@0%": 0.009765625,
"calibration/coverage@1%": 0.009765625,
"calibration/coverage@10%": 0.091796875,
"calibration/coverage@15%": 0.172265625,
"calibration/coverage@20%": 0.248046875,
"calibration/coverage@25%": 0.38515625,
"calibration/coverage@30%": 0.521875,
"calibration/coverage@5%": 0.01953125,
"calibration/ece": 0.12001761637334837,
"calibration/mean_confidence": 0.5420491853075061,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 530.0,
"completions/max_terminated_length": 530.0,
"completions/mean_length": 170.45859375,
"completions/mean_terminated_length": 170.49200744628905,
"completions/min_length": 45.2,
"completions/min_terminated_length": 78.2,
"epoch": 0.304,
"grad_norm": 0.0012685375986620784,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 315287734.0,
"reward": 0.979833447933197,
"reward_std": 0.10198417156934739,
"rewards/accuracy_reward": 0.50478515625,
"rewards/brier_reward": 0.7598029494285583,
"rewards/confidence_uniqueness_reward": 0.9540252685546875,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.006118230614811182,
"rewards/frontier_coverage_1": 0.12017403692007064,
"rewards/frontier_coverage_10": 0.12017403692007064,
"rewards/frontier_coverage_15": 0.12012646496295928,
"rewards/frontier_coverage_20": 0.1176812157034874,
"rewards/frontier_coverage_25": 0.10150278061628341,
"rewards/frontier_coverage_5": 0.12017403692007064,
"rewards/frontier_ece_reward": 0.007921409513801336,
"signal/accuracy_reward/centered_abs_mean": 0.132562255859375,
"signal/accuracy_reward/group_std_mean": 0.1767975628376007,
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0662811279296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0662811279296875,
"signal/advantage_abs_mean": 0.07725491225719452,
"signal/advantage_pre_scale_abs_mean": 0.07725491225719452,
"signal/advantage_pre_scale_std": 0.11971888989210129,
"signal/advantage_std": 0.11971888989210129,
"signal/brier_reward/centered_abs_mean": 0.1780666172504425,
"signal/brier_reward/group_std_mean": 0.22535655200481414,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022258327156305314,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022258327156305314,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023503893241286277,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03153311610221863,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029379866551607846,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029379866551607846,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.006390297319740057,
"signal/frontier_aurc_reward/group_std_mean": 0.011566732451319695,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00011438632354838773,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00011438632354838773,
"signal/frontier_coverage_1/centered_abs_mean": 0.23089086413383483,
"signal/frontier_coverage_1/group_std_mean": 0.2983818709850311,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004132946487516165,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004132946487516165,
"signal/frontier_coverage_10/centered_abs_mean": 0.23089086413383483,
"signal/frontier_coverage_10/group_std_mean": 0.2983818709850311,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004132946487516165,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004132946487516165,
"signal/frontier_coverage_15/centered_abs_mean": 0.2303963989019394,
"signal/frontier_coverage_15/group_std_mean": 0.2977550089359283,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0041240955702960495,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041240955702960495,
"signal/frontier_coverage_20/centered_abs_mean": 0.22403717041015625,
"signal/frontier_coverage_20/group_std_mean": 0.28987476229667664,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00401026513427496,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00401026513427496,
"signal/frontier_coverage_25/centered_abs_mean": 0.18753055930137635,
"signal/frontier_coverage_25/group_std_mean": 0.24351601004600526,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003356796922162175,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003356796922162175,
"signal/frontier_coverage_5/centered_abs_mean": 0.23089086413383483,
"signal/frontier_coverage_5/group_std_mean": 0.2983818709850311,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004132946487516165,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004132946487516165,
"signal/frontier_ece_reward/centered_abs_mean": 0.016810842603445054,
"signal/frontier_ece_reward/group_std_mean": 0.021050842106342317,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021013553254306317,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021013553254306317,
"step": 95
},
{
"calibration/aurc": 0.25919335064503096,
"calibration/batch_distribution_entropy": 0.9588833435083319,
"calibration/buffer_distribution_entropy": 0.9745603158881533,
"calibration/confidence_entropy": 0.4357127864815533,
"calibration/coverage@0%": 0.00859375,
"calibration/coverage@1%": 0.00859375,
"calibration/coverage@10%": 0.217578125,
"calibration/coverage@15%": 0.3375,
"calibration/coverage@20%": 0.43671875,
"calibration/coverage@25%": 0.541015625,
"calibration/coverage@30%": 0.659375,
"calibration/coverage@5%": 0.04921875,
"calibration/ece": 0.14945910105641208,
"calibration/mean_confidence": 0.5585945923145902,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 572.0,
"completions/max_terminated_length": 572.0,
"completions/mean_length": 170.32451171875,
"completions/mean_terminated_length": 170.44094543457032,
"completions/min_length": 33.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.32,
"grad_norm": 0.0009813595097512007,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 332120561.0,
"reward": 0.9926390767097473,
"reward_std": 0.09243862479925155,
"rewards/accuracy_reward": 0.52763671875,
"rewards/brier_reward": 0.7718438506126404,
"rewards/confidence_uniqueness_reward": 0.9550535559654236,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.005387743096798658,
"rewards/frontier_coverage_1": 0.11694640582427382,
"rewards/frontier_coverage_10": 0.11694640582427382,
"rewards/frontier_coverage_15": 0.11703521078452468,
"rewards/frontier_coverage_20": 0.11508952975273132,
"rewards/frontier_coverage_25": 0.1003711468540132,
"rewards/frontier_coverage_5": 0.11694640582427382,
"rewards/frontier_ece_reward": 0.00932073788717389,
"signal/accuracy_reward/centered_abs_mean": 0.101153564453125,
"signal/accuracy_reward/group_std_mean": 0.13886838555335998,
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0505767822265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0505767822265625,
"signal/advantage_abs_mean": 0.06976389810442925,
"signal/advantage_pre_scale_abs_mean": 0.06976389810442925,
"signal/advantage_pre_scale_std": 0.11266756206750869,
"signal/advantage_std": 0.11266756206750869,
"signal/brier_reward/centered_abs_mean": 0.17124907970428466,
"signal/brier_reward/group_std_mean": 0.21771592497825623,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021406134963035582,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021406134963035582,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022157645598053933,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029678992554545404,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027697056997567416,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027697056997567416,
"signal/format_reward/centered_abs_mean": 0.001287841796875,
"signal/format_reward/group_std_mean": 0.003135160403326154,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006439208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006439208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005809414759278297,
"signal/frontier_aurc_reward/group_std_mean": 0.009797831438481808,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010398852027719841,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010398852027719841,
"signal/frontier_coverage_1/centered_abs_mean": 0.20570947229862213,
"signal/frontier_coverage_1/group_std_mean": 0.2661749660968781,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036821993533521892,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036821993533521892,
"signal/frontier_coverage_10/centered_abs_mean": 0.20570947229862213,
"signal/frontier_coverage_10/group_std_mean": 0.2661749660968781,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036821993533521892,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036821993533521892,
"signal/frontier_coverage_15/centered_abs_mean": 0.2047569900751114,
"signal/frontier_coverage_15/group_std_mean": 0.2649563252925873,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036651499569416044,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036651499569416044,
"signal/frontier_coverage_20/centered_abs_mean": 0.19807130694389344,
"signal/frontier_coverage_20/group_std_mean": 0.25659127831459044,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035454762168228627,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035454762168228627,
"signal/frontier_coverage_25/centered_abs_mean": 0.1582948684692383,
"signal/frontier_coverage_25/group_std_mean": 0.2060314029455185,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002833477919921279,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002833477919921279,
"signal/frontier_coverage_5/centered_abs_mean": 0.20570947229862213,
"signal/frontier_coverage_5/group_std_mean": 0.2661749660968781,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036821993533521892,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036821993533521892,
"signal/frontier_ece_reward/centered_abs_mean": 0.016902418434619905,
"signal/frontier_ece_reward/group_std_mean": 0.020892414450645446,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002112802304327488,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002112802304327488,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.5026836588603916,
"eval_calibration/batch_distribution_entropy": 0.9227077340793142,
"eval_calibration/buffer_distribution_entropy": 0.9686803915817666,
"eval_calibration/confidence_entropy": 0.43340676534345873,
"eval_calibration/coverage@0%": 0.078125,
"eval_calibration/coverage@1%": 0.078125,
"eval_calibration/coverage@10%": 0.1015625,
"eval_calibration/coverage@15%": 0.1015625,
"eval_calibration/coverage@20%": 0.109375,
"eval_calibration/coverage@25%": 0.1328125,
"eval_calibration/coverage@30%": 0.140625,
"eval_calibration/coverage@5%": 0.078125,
"eval_calibration/ece": 0.22255453627881616,
"eval_calibration/mean_confidence": 0.45931554026700455,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 309.5,
"eval_completions/max_terminated_length": 309.5,
"eval_completions/mean_length": 173.49979782104492,
"eval_completions/mean_terminated_length": 173.83627319335938,
"eval_completions/min_length": 65.75,
"eval_completions/min_terminated_length": 86.25,
"eval_loss": 0.0,
"eval_num_tokens": 332120561.0,
"eval_reward": 0.9369220435619354,
"eval_reward_std": 0.22920089587569237,
"eval_rewards/accuracy_reward": 0.419921875,
"eval_rewards/brier_reward": 0.7654155492782593,
"eval_rewards/confidence_uniqueness_reward": 0.902989536523819,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.006293334416113794,
"eval_rewards/frontier_coverage_1": 0.18488197773694992,
"eval_rewards/frontier_coverage_10": 0.18488197773694992,
"eval_rewards/frontier_coverage_15": 0.1831054612994194,
"eval_rewards/frontier_coverage_20": 0.16677184775471687,
"eval_rewards/frontier_coverage_25": 0.11936107277870178,
"eval_rewards/frontier_coverage_5": 0.18488197773694992,
"eval_rewards/frontier_ece_reward": 0.009377233684062958,
"eval_runtime": 26.6031,
"eval_samples_per_second": 18.795,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4727783203125,
"eval_signal/accuracy_reward/group_std_mean": 0.49358750879764557,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23638916015625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23638916015625,
"eval_signal/advantage_abs_mean": 0.20714639872312546,
"eval_signal/advantage_pre_scale_abs_mean": 0.20714639872312546,
"eval_signal/advantage_pre_scale_std": 0.22685326635837555,
"eval_signal/advantage_std": 0.22685326635837555,
"eval_signal/brier_reward/centered_abs_mean": 0.231270469725132,
"eval_signal/brier_reward/group_std_mean": 0.28562986105680466,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0289088087156415,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0289088087156415,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0414502527564764,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05526182893663645,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00518128159455955,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00518128159455955,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.008229476981796324,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.014984771609306335,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001473076335969381,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001473076335969381,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.37741898000240326,
"eval_signal/frontier_coverage_1/group_std_mean": 0.47337739169597626,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006755799404345453,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006755799404345453,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.37741898000240326,
"eval_signal/frontier_coverage_10/group_std_mean": 0.47337739169597626,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006755799404345453,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006755799404345453,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3739687353372574,
"eval_signal/frontier_coverage_15/group_std_mean": 0.46921079605817795,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006694040144793689,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006694040144793689,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.34481101483106613,
"eval_signal/frontier_coverage_20/group_std_mean": 0.43423992395401,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006172116962261498,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006172116962261498,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.24870974197983742,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3186473697423935,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0044519040966406465,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0044519040966406465,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.37741898000240326,
"eval_signal/frontier_coverage_5/group_std_mean": 0.47337739169597626,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006755799404345453,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006755799404345453,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.01966511318460107,
"eval_signal/frontier_ece_reward/group_std_mean": 0.02496191207319498,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0024581391480751336,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0024581391480751336,
"eval_steps_per_second": 0.15,
"step": 100
},
{
"calibration/aurc": 0.2921943535028766,
"calibration/batch_distribution_entropy": 0.9539542764449829,
"calibration/buffer_distribution_entropy": 0.9679044804508562,
"calibration/confidence_entropy": 0.4318321504453554,
"calibration/coverage@0%": 0.007827788649706457,
"calibration/coverage@1%": 0.007827788649706457,
"calibration/coverage@10%": 0.055890716731898236,
"calibration/coverage@15%": 0.15060344000613943,
"calibration/coverage@20%": 0.2335782994704731,
"calibration/coverage@25%": 0.39862115265626796,
"calibration/coverage@30%": 0.5299859764111124,
"calibration/coverage@5%": 0.010567514677103717,
"calibration/ece": 0.1309882683737706,
"calibration/mean_confidence": 0.5033657145265187,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00078125,
"completions/max_length": 472.6,
"completions/max_terminated_length": 472.6,
"completions/mean_length": 171.4955078125,
"completions/mean_terminated_length": 171.62926025390624,
"completions/min_length": 12.6,
"completions/min_terminated_length": 75.6,
"epoch": 0.336,
"grad_norm": 0.001385677489452064,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 348599107.0,
"reward": 0.9934534430503845,
"reward_std": 0.09883903712034225,
"rewards/accuracy_reward": 0.53154296875,
"rewards/brier_reward": 0.7714316129684449,
"rewards/confidence_uniqueness_reward": 0.9537386059761047,
"rewards/format_reward": 0.99892578125,
"rewards/frontier_aurc_reward": -0.005051617510616779,
"rewards/frontier_coverage_1": 0.11371075063943863,
"rewards/frontier_coverage_10": 0.11371075063943863,
"rewards/frontier_coverage_15": 0.11198057159781456,
"rewards/frontier_coverage_20": 0.10444353222846985,
"rewards/frontier_coverage_25": 0.09080478549003601,
"rewards/frontier_coverage_5": 0.11371075063943863,
"rewards/frontier_ece_reward": 0.008460283372551202,
"signal/accuracy_reward/centered_abs_mean": 0.116693115234375,
"signal/accuracy_reward/group_std_mean": 0.15743698477745055,
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0583465576171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0583465576171875,
"signal/advantage_abs_mean": 0.07462372928857804,
"signal/advantage_pre_scale_abs_mean": 0.07462372928857804,
"signal/advantage_pre_scale_std": 0.11967791616916656,
"signal/advantage_std": 0.11967791616916656,
"signal/brier_reward/centered_abs_mean": 0.17411761581897736,
"signal/brier_reward/group_std_mean": 0.2207622081041336,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02176470197737217,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02176470197737217,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022577373310923576,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03221844956278801,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002822171663865447,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002822171663865447,
"signal/format_reward/centered_abs_mean": 0.002069091796875,
"signal/format_reward/group_std_mean": 0.005740390345454216,
"signal/format_reward/group_zero_std_frac": 0.96875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0053861944004893305,
"signal/frontier_aurc_reward/group_std_mean": 0.009071863163262606,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.641287761041895e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.641287761041895e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21498210728168488,
"signal/frontier_coverage_1/group_std_mean": 0.2763300836086273,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038481795694679023,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038481795694679023,
"signal/frontier_coverage_10/centered_abs_mean": 0.21498210728168488,
"signal/frontier_coverage_10/group_std_mean": 0.2763300836086273,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038481795694679023,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038481795694679023,
"signal/frontier_coverage_15/centered_abs_mean": 0.20946071445941924,
"signal/frontier_coverage_15/group_std_mean": 0.2693983554840088,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037493467330932616,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037493467330932616,
"signal/frontier_coverage_20/centered_abs_mean": 0.18959801495075226,
"signal/frontier_coverage_20/group_std_mean": 0.2442725867033005,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033938043285161256,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033938043285161256,
"signal/frontier_coverage_25/centered_abs_mean": 0.14696356356143953,
"signal/frontier_coverage_25/group_std_mean": 0.18960790634155272,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026306476909667255,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026306476909667255,
"signal/frontier_coverage_5/centered_abs_mean": 0.21498210728168488,
"signal/frontier_coverage_5/group_std_mean": 0.2763300836086273,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038481795694679023,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038481795694679023,
"signal/frontier_ece_reward/centered_abs_mean": 0.014189903996884823,
"signal/frontier_ece_reward/group_std_mean": 0.01779564656317234,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001773737999610603,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001773737999610603,
"step": 105
},
{
"calibration/aurc": 0.3465969957495763,
"calibration/batch_distribution_entropy": 0.9112800543706812,
"calibration/buffer_distribution_entropy": 0.9629320221706934,
"calibration/confidence_entropy": 0.39319961365768036,
"calibration/coverage@0%": 0.029739481409001955,
"calibration/coverage@1%": 0.029739481409001955,
"calibration/coverage@10%": 0.12248284973235868,
"calibration/coverage@15%": 0.20113317672959594,
"calibration/coverage@20%": 0.3303396208654695,
"calibration/coverage@25%": 0.4172635805178236,
"calibration/coverage@30%": 0.4775400442711331,
"calibration/coverage@5%": 0.05165728962818004,
"calibration/ece": 0.13955969644619798,
"calibration/mean_confidence": 0.447478855066922,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 526.2,
"completions/max_terminated_length": 526.2,
"completions/mean_length": 173.3138671875,
"completions/mean_terminated_length": 173.3481689453125,
"completions/min_length": 46.4,
"completions/min_terminated_length": 79.2,
"epoch": 0.352,
"grad_norm": 0.0013226274168118834,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 365634257.0,
"reward": 0.9634492516517639,
"reward_std": 0.09292121678590774,
"rewards/accuracy_reward": 0.46220703125,
"rewards/brier_reward": 0.7718739628791809,
"rewards/confidence_uniqueness_reward": 0.9495089411735534,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.005700189620256424,
"rewards/frontier_coverage_1": 0.16635644137859346,
"rewards/frontier_coverage_10": 0.16635644137859346,
"rewards/frontier_coverage_15": 0.16308521777391433,
"rewards/frontier_coverage_20": 0.14273270815610886,
"rewards/frontier_coverage_25": 0.11742941588163376,
"rewards/frontier_coverage_5": 0.16635644137859346,
"rewards/frontier_ece_reward": 0.0076860770583152774,
"signal/accuracy_reward/centered_abs_mean": 0.112554931640625,
"signal/accuracy_reward/group_std_mean": 0.14770327508449554,
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0562774658203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0562774658203125,
"signal/advantage_abs_mean": 0.07131645083427429,
"signal/advantage_pre_scale_abs_mean": 0.07131645083427429,
"signal/advantage_pre_scale_std": 0.113882178068161,
"signal/advantage_std": 0.113882178068161,
"signal/brier_reward/centered_abs_mean": 0.16707732379436493,
"signal/brier_reward/group_std_mean": 0.2137216806411743,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020884665474295617,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020884665474295617,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0262396153062582,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03516591116786003,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003279951913282275,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003279951913282275,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086068242787,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005570146534591913,
"signal/frontier_aurc_reward/group_std_mean": 0.009517465904355048,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.970561804948375e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.970561804948375e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21669540405273438,
"signal/frontier_coverage_1/group_std_mean": 0.27859448790550234,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038788476493209602,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038788476493209602,
"signal/frontier_coverage_10/centered_abs_mean": 0.21669540405273438,
"signal/frontier_coverage_10/group_std_mean": 0.27859448790550234,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038788476493209602,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038788476493209602,
"signal/frontier_coverage_15/centered_abs_mean": 0.2109543651342392,
"signal/frontier_coverage_15/group_std_mean": 0.27131189703941344,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037760830018669367,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037760830018669367,
"signal/frontier_coverage_20/centered_abs_mean": 0.18779728710651397,
"signal/frontier_coverage_20/group_std_mean": 0.24160605072975158,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033615713473409414,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033615713473409414,
"signal/frontier_coverage_25/centered_abs_mean": 0.15097877085208894,
"signal/frontier_coverage_25/group_std_mean": 0.19353824257850646,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027025198098272083,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027025198098272083,
"signal/frontier_coverage_5/centered_abs_mean": 0.21669540405273438,
"signal/frontier_coverage_5/group_std_mean": 0.27859448790550234,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038788476493209602,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038788476493209602,
"signal/frontier_ece_reward/centered_abs_mean": 0.013547240383923053,
"signal/frontier_ece_reward/group_std_mean": 0.01688943840563297,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016934050479903817,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016934050479903817,
"step": 110
},
{
"calibration/aurc": 0.3685626350922462,
"calibration/batch_distribution_entropy": 0.945674845315077,
"calibration/buffer_distribution_entropy": 0.9501859486942553,
"calibration/confidence_entropy": 0.4218211612166747,
"calibration/coverage@0%": 0.004296875,
"calibration/coverage@1%": 0.004296875,
"calibration/coverage@10%": 0.044140625,
"calibration/coverage@15%": 0.11484375,
"calibration/coverage@20%": 0.183984375,
"calibration/coverage@25%": 0.307421875,
"calibration/coverage@30%": 0.38828125,
"calibration/coverage@5%": 0.004296875,
"calibration/ece": 0.13709173849716338,
"calibration/mean_confidence": 0.501956311672906,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 770.8,
"completions/max_terminated_length": 770.8,
"completions/mean_length": 177.635546875,
"completions/mean_terminated_length": 177.72190856933594,
"completions/min_length": 30.6,
"completions/min_terminated_length": 81.6,
"epoch": 0.368,
"grad_norm": 0.0012824723962694407,
"learning_rate": 1e-06,
"loss": -0.0004,
"num_tokens": 382518717.0,
"reward": 0.9759793758392334,
"reward_std": 0.08999019116163254,
"rewards/accuracy_reward": 0.4904296875,
"rewards/brier_reward": 0.7730782985687256,
"rewards/confidence_uniqueness_reward": 0.9514719486236572,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.0056956646963953975,
"rewards/frontier_coverage_1": 0.14614581167697907,
"rewards/frontier_coverage_10": 0.14614581167697907,
"rewards/frontier_coverage_15": 0.14474955052137375,
"rewards/frontier_coverage_20": 0.13514772653579712,
"rewards/frontier_coverage_25": 0.10782853215932846,
"rewards/frontier_coverage_5": 0.14614581167697907,
"rewards/frontier_ece_reward": 0.007590759824961424,
"signal/accuracy_reward/centered_abs_mean": 0.10577392578125,
"signal/accuracy_reward/group_std_mean": 0.13968519866466522,
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052886962890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052886962890625,
"signal/advantage_abs_mean": 0.06855102777481079,
"signal/advantage_pre_scale_abs_mean": 0.06855102777481079,
"signal/advantage_pre_scale_std": 0.11280774921178818,
"signal/advantage_std": 0.11280774921178818,
"signal/brier_reward/centered_abs_mean": 0.1598452240228653,
"signal/brier_reward/group_std_mean": 0.20456505715847015,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019980653002858163,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019980653002858163,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024071278423070906,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033364905044436455,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030089098028838633,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030089098028838633,
"signal/format_reward/centered_abs_mean": 0.001690673828125,
"signal/format_reward/group_std_mean": 0.004635536018759013,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0053287448361516,
"signal/frontier_aurc_reward/group_std_mean": 0.00915834279730916,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.538452868582681e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.538452868582681e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20414318442344664,
"signal/frontier_coverage_1/group_std_mean": 0.2633121430873871,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036541628651320934,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036541628651320934,
"signal/frontier_coverage_10/centered_abs_mean": 0.20414318442344664,
"signal/frontier_coverage_10/group_std_mean": 0.2633121430873871,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036541628651320934,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036541628651320934,
"signal/frontier_coverage_15/centered_abs_mean": 0.20212631225585936,
"signal/frontier_coverage_15/group_std_mean": 0.26074336767196654,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003618060797452927,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003618060797452927,
"signal/frontier_coverage_20/centered_abs_mean": 0.18931483924388887,
"signal/frontier_coverage_20/group_std_mean": 0.2444866716861725,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033887355588376523,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033887355588376523,
"signal/frontier_coverage_25/centered_abs_mean": 0.14778677523136138,
"signal/frontier_coverage_25/group_std_mean": 0.19104886054992676,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00264538312330842,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00264538312330842,
"signal/frontier_coverage_5/centered_abs_mean": 0.20414318442344664,
"signal/frontier_coverage_5/group_std_mean": 0.2633121430873871,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036541628651320934,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036541628651320934,
"signal/frontier_ece_reward/centered_abs_mean": 0.013357460685074329,
"signal/frontier_ece_reward/group_std_mean": 0.01701092943549156,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001669682585634291,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001669682585634291,
"step": 115
},
{
"calibration/aurc": 0.3279842686131645,
"calibration/batch_distribution_entropy": 0.9076316158648453,
"calibration/buffer_distribution_entropy": 0.9469329101312176,
"calibration/confidence_entropy": 0.38987365992040235,
"calibration/coverage@0%": 0.013291955052568971,
"calibration/coverage@1%": 0.013291955052568971,
"calibration/coverage@10%": 0.1392352647394574,
"calibration/coverage@15%": 0.22447254458290167,
"calibration/coverage@20%": 0.2971639704683243,
"calibration/coverage@25%": 0.3264822495779134,
"calibration/coverage@30%": 0.37885963460726757,
"calibration/coverage@5%": 0.07782152330110126,
"calibration/ece": 0.14755187827953356,
"calibration/mean_confidence": 0.4364244980707899,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 723.6,
"completions/max_terminated_length": 723.6,
"completions/mean_length": 179.4115234375,
"completions/mean_terminated_length": 179.4979736328125,
"completions/min_length": 36.0,
"completions/min_terminated_length": 76.2,
"epoch": 0.384,
"grad_norm": 0.0009319792152382433,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 399212403.0,
"reward": 0.9921321511268616,
"reward_std": 0.08957361131906509,
"rewards/accuracy_reward": 0.52138671875,
"rewards/brier_reward": 0.7853901386260986,
"rewards/confidence_uniqueness_reward": 0.9487253665924072,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.004573598969727755,
"rewards/frontier_coverage_1": 0.139671640843153,
"rewards/frontier_coverage_10": 0.13900703862309455,
"rewards/frontier_coverage_15": 0.1376990981400013,
"rewards/frontier_coverage_20": 0.13199115544557571,
"rewards/frontier_coverage_25": 0.10313726216554642,
"rewards/frontier_coverage_5": 0.139671640843153,
"rewards/frontier_ece_reward": 0.008268655464053153,
"signal/accuracy_reward/centered_abs_mean": 0.108905029296875,
"signal/accuracy_reward/group_std_mean": 0.14662058651447296,
"signal/accuracy_reward/group_zero_std_frac": 0.56875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0544525146484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0544525146484375,
"signal/advantage_abs_mean": 0.06682626381516457,
"signal/advantage_pre_scale_abs_mean": 0.06682626381516457,
"signal/advantage_pre_scale_std": 0.11096349507570266,
"signal/advantage_std": 0.11096349507570266,
"signal/brier_reward/centered_abs_mean": 0.1534943848848343,
"signal/brier_reward/group_std_mean": 0.19769077599048615,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019186798110604288,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019186798110604288,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02545735388994217,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03464719727635383,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031821692362427713,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031821692362427713,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_std_mean": 0.004971844423562288,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004512353893369436,
"signal/frontier_aurc_reward/group_std_mean": 0.007486558333039284,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.077113016042858e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.077113016042858e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.202898108959198,
"signal/frontier_coverage_1/group_std_mean": 0.2634346067905426,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003631876036524773,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003631876036524773,
"signal/frontier_coverage_10/centered_abs_mean": 0.20246056616306304,
"signal/frontier_coverage_10/group_std_mean": 0.262888640165329,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003624043893069029,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003624043893069029,
"signal/frontier_coverage_15/centered_abs_mean": 0.1991115540266037,
"signal/frontier_coverage_15/group_std_mean": 0.2586476981639862,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003564096707850695,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003564096707850695,
"signal/frontier_coverage_20/centered_abs_mean": 0.1904287278652191,
"signal/frontier_coverage_20/group_std_mean": 0.24768714606761932,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003408674104139209,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003408674104139209,
"signal/frontier_coverage_25/centered_abs_mean": 0.14712706804275513,
"signal/frontier_coverage_25/group_std_mean": 0.1920635223388672,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026335744187235832,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026335744187235832,
"signal/frontier_coverage_5/centered_abs_mean": 0.202898108959198,
"signal/frontier_coverage_5/group_std_mean": 0.2634346067905426,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003631876036524773,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003631876036524773,
"signal/frontier_ece_reward/centered_abs_mean": 0.012122917175292968,
"signal/frontier_ece_reward/group_std_mean": 0.015267861634492874,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001515364646911621,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001515364646911621,
"step": 120
},
{
"calibration/aurc": 0.42828427297185934,
"calibration/batch_distribution_entropy": 0.9521744849755738,
"calibration/buffer_distribution_entropy": 0.9448504540785763,
"calibration/confidence_entropy": 0.42884726577574755,
"calibration/coverage@0%": 0.0007827788649706457,
"calibration/coverage@1%": 0.0007827788649706457,
"calibration/coverage@10%": 0.0007827788649706457,
"calibration/coverage@15%": 0.0007827788649706457,
"calibration/coverage@20%": 0.023075159001956946,
"calibration/coverage@25%": 0.060643193493150685,
"calibration/coverage@30%": 0.17374709515655576,
"calibration/coverage@5%": 0.0007827788649706457,
"calibration/ece": 0.20050038966104805,
"calibration/mean_confidence": 0.4933116642167465,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 521.6,
"completions/max_terminated_length": 521.6,
"completions/mean_length": 180.09287109375,
"completions/mean_terminated_length": 180.1466857910156,
"completions/min_length": 52.4,
"completions/min_terminated_length": 86.8,
"epoch": 0.4,
"grad_norm": 0.0011204322800040245,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 416093002.0,
"reward": 0.9779730916023255,
"reward_std": 0.09525720775127411,
"rewards/accuracy_reward": 0.49853515625,
"rewards/brier_reward": 0.7649884939193725,
"rewards/confidence_uniqueness_reward": 0.9499655842781067,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0066272450610995294,
"rewards/frontier_coverage_1": 0.13427471909672023,
"rewards/frontier_coverage_10": 0.1341161746531725,
"rewards/frontier_coverage_15": 0.13308571912348272,
"rewards/frontier_coverage_20": 0.12990262992680074,
"rewards/frontier_coverage_25": 0.10391272101551294,
"rewards/frontier_coverage_5": 0.13427471909672023,
"rewards/frontier_ece_reward": 0.0069994966499507425,
"signal/accuracy_reward/centered_abs_mean": 0.119293212890625,
"signal/accuracy_reward/group_std_mean": 0.1583190679550171,
"signal/accuracy_reward/group_zero_std_frac": 0.54375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0596466064453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0596466064453125,
"signal/advantage_abs_mean": 0.07242369055747985,
"signal/advantage_pre_scale_abs_mean": 0.07242369055747985,
"signal/advantage_pre_scale_std": 0.11832114905118943,
"signal/advantage_std": 0.11832114905118943,
"signal/brier_reward/centered_abs_mean": 0.16406248211860658,
"signal/brier_reward/group_std_mean": 0.20769093930721283,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020507810264825822,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020507810264825822,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02512081600725651,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03330396898090839,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031401020009070636,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031401020009070636,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.006808020174503326,
"signal/frontier_aurc_reward/group_std_mean": 0.011628658324480057,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00012186356034362688,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00012186356034362688,
"signal/frontier_coverage_1/centered_abs_mean": 0.20207859575748444,
"signal/frontier_coverage_1/group_std_mean": 0.2614697962999344,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036172067746520044,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036172067746520044,
"signal/frontier_coverage_10/centered_abs_mean": 0.20164523124694825,
"signal/frontier_coverage_10/group_std_mean": 0.26091606318950655,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036094495560973884,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036094495560973884,
"signal/frontier_coverage_15/centered_abs_mean": 0.20034250617027283,
"signal/frontier_coverage_15/group_std_mean": 0.2592529833316803,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00358613058924675,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00358613058924675,
"signal/frontier_coverage_20/centered_abs_mean": 0.1943148672580719,
"signal/frontier_coverage_20/group_std_mean": 0.2516128808259964,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034782360307872295,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034782360307872295,
"signal/frontier_coverage_25/centered_abs_mean": 0.1490813046693802,
"signal/frontier_coverage_25/group_std_mean": 0.19377623796463012,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002668555360287428,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002668555360287428,
"signal/frontier_coverage_5/centered_abs_mean": 0.20207859575748444,
"signal/frontier_coverage_5/group_std_mean": 0.2614697962999344,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036172067746520044,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036172067746520044,
"signal/frontier_ece_reward/centered_abs_mean": 0.012414365261793136,
"signal/frontier_ece_reward/group_std_mean": 0.015568001568317414,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001551795657724142,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001551795657724142,
"step": 125
},
{
"calibration/aurc": 0.3370713188856328,
"calibration/batch_distribution_entropy": 0.9547285499149123,
"calibration/buffer_distribution_entropy": 0.9601620068322163,
"calibration/confidence_entropy": 0.4410863910308821,
"calibration/coverage@0%": 0.00078125,
"calibration/coverage@1%": 0.00078125,
"calibration/coverage@10%": 0.00078125,
"calibration/coverage@15%": 0.01015625,
"calibration/coverage@20%": 0.0734375,
"calibration/coverage@25%": 0.2078125,
"calibration/coverage@30%": 0.441796875,
"calibration/coverage@5%": 0.00078125,
"calibration/ece": 0.14334507187353746,
"calibration/mean_confidence": 0.5274385731135967,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 597.4,
"completions/max_terminated_length": 597.4,
"completions/mean_length": 179.6830078125,
"completions/mean_terminated_length": 179.70028991699218,
"completions/min_length": 69.2,
"completions/min_terminated_length": 84.0,
"epoch": 0.416,
"grad_norm": 0.0011716982116922736,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 432814140.0,
"reward": 0.9867512822151184,
"reward_std": 0.0903295949101448,
"rewards/accuracy_reward": 0.5125,
"rewards/brier_reward": 0.7757958054542542,
"rewards/confidence_uniqueness_reward": 0.9511458039283752,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0052760880906134846,
"rewards/frontier_coverage_1": 0.13428325802087784,
"rewards/frontier_coverage_10": 0.13398997932672502,
"rewards/frontier_coverage_15": 0.13301783949136733,
"rewards/frontier_coverage_20": 0.13087771385908126,
"rewards/frontier_coverage_25": 0.10962761044502259,
"rewards/frontier_coverage_5": 0.13428325802087784,
"rewards/frontier_ece_reward": 0.007861407473683357,
"signal/accuracy_reward/centered_abs_mean": 0.11356201171875,
"signal/accuracy_reward/group_std_mean": 0.14841245412826537,
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056781005859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.056781005859375,
"signal/advantage_abs_mean": 0.06997922211885452,
"signal/advantage_pre_scale_abs_mean": 0.06997922211885452,
"signal/advantage_pre_scale_std": 0.11359207183122635,
"signal/advantage_std": 0.11359207183122635,
"signal/brier_reward/centered_abs_mean": 0.1620676189661026,
"signal/brier_reward/group_std_mean": 0.20438670516014099,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020258452370762826,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020258452370762826,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025409137457609178,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03423595391213894,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031761421822011473,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031761421822011473,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005544650834053755,
"signal/frontier_aurc_reward/group_std_mean": 0.009931431245058775,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.92492467048578e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.92492467048578e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20947245955467225,
"signal/frontier_coverage_1/group_std_mean": 0.2671071171760559,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037495568860322235,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037495568860322235,
"signal/frontier_coverage_10/centered_abs_mean": 0.20926251411437988,
"signal/frontier_coverage_10/group_std_mean": 0.26683722734451293,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003745798906311393,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003745798906311393,
"signal/frontier_coverage_15/centered_abs_mean": 0.20678186416625977,
"signal/frontier_coverage_15/group_std_mean": 0.2637243688106537,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003701395262032747,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003701395262032747,
"signal/frontier_coverage_20/centered_abs_mean": 0.20288313031196595,
"signal/frontier_coverage_20/group_std_mean": 0.2586935132741928,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036316079553216697,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036316079553216697,
"signal/frontier_coverage_25/centered_abs_mean": 0.16764808297157288,
"signal/frontier_coverage_25/group_std_mean": 0.21396067142486572,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030009005218744277,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030009005218744277,
"signal/frontier_coverage_5/centered_abs_mean": 0.20947245955467225,
"signal/frontier_coverage_5/group_std_mean": 0.2671071171760559,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037495568860322235,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037495568860322235,
"signal/frontier_ece_reward/centered_abs_mean": 0.012680101022124291,
"signal/frontier_ece_reward/group_std_mean": 0.015965329110622407,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015850126277655364,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015850126277655364,
"step": 130
},
{
"calibration/aurc": 0.2666005316559734,
"calibration/batch_distribution_entropy": 0.9318495853740112,
"calibration/buffer_distribution_entropy": 0.9668415393199925,
"calibration/confidence_entropy": 0.40613918812597366,
"calibration/coverage@0%": 0.011332720588235295,
"calibration/coverage@1%": 0.011332720588235295,
"calibration/coverage@10%": 0.14793964460784315,
"calibration/coverage@15%": 0.222265625,
"calibration/coverage@20%": 0.3520373774509804,
"calibration/coverage@25%": 0.4407797181372549,
"calibration/coverage@30%": 0.5138848039215687,
"calibration/coverage@5%": 0.08456954656862745,
"calibration/ece": 0.12372510080618797,
"calibration/mean_confidence": 0.5410209922965908,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 518.8,
"completions/max_terminated_length": 518.8,
"completions/mean_length": 179.09892578125,
"completions/mean_terminated_length": 179.1511962890625,
"completions/min_length": 53.4,
"completions/min_terminated_length": 87.0,
"epoch": 0.432,
"grad_norm": 0.001560159376822412,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 449662449.0,
"reward": 1.0048672199249267,
"reward_std": 0.08422506302595138,
"rewards/accuracy_reward": 0.5451171875,
"rewards/brier_reward": 0.7950255751609803,
"rewards/confidence_uniqueness_reward": 0.9479985952377319,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.003988235350698232,
"rewards/frontier_coverage_1": 0.1352456420660019,
"rewards/frontier_coverage_10": 0.13473829627037048,
"rewards/frontier_coverage_15": 0.13408846855163575,
"rewards/frontier_coverage_20": 0.1241634801030159,
"rewards/frontier_coverage_25": 0.09736895710229873,
"rewards/frontier_coverage_5": 0.1352456420660019,
"rewards/frontier_ece_reward": 0.008624885324388742,
"signal/accuracy_reward/centered_abs_mean": 0.1138427734375,
"signal/accuracy_reward/group_std_mean": 0.145708866417408,
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05692138671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05692138671875,
"signal/advantage_abs_mean": 0.0648413248360157,
"signal/advantage_pre_scale_abs_mean": 0.0648413248360157,
"signal/advantage_pre_scale_std": 0.10934355407953263,
"signal/advantage_std": 0.10934355407953263,
"signal/brier_reward/centered_abs_mean": 0.14669396579265595,
"signal/brier_reward/group_std_mean": 0.188351371884346,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018336745724081994,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018336745724081994,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027258436009287835,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03615141212940216,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034073045011609794,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034073045011609794,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004199815262109041,
"signal/frontier_aurc_reward/group_std_mean": 0.007237010449171066,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.517668855143711e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.517668855143711e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1969534784555435,
"signal/frontier_coverage_1/group_std_mean": 0.25218563675880434,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035254672169685365,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035254672169685365,
"signal/frontier_coverage_10/centered_abs_mean": 0.19580358266830444,
"signal/frontier_coverage_10/group_std_mean": 0.250775471329689,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003504884196445346,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003504884196445346,
"signal/frontier_coverage_15/centered_abs_mean": 0.19471007883548735,
"signal/frontier_coverage_15/group_std_mean": 0.24942584335803986,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034853102173656226,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034853102173656226,
"signal/frontier_coverage_20/centered_abs_mean": 0.1738467276096344,
"signal/frontier_coverage_20/group_std_mean": 0.22313897609710692,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031118562910705805,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031118562910705805,
"signal/frontier_coverage_25/centered_abs_mean": 0.12515972554683685,
"signal/frontier_coverage_25/group_std_mean": 0.1612490564584732,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002240359038114548,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002240359038114548,
"signal/frontier_coverage_5/centered_abs_mean": 0.1969534784555435,
"signal/frontier_coverage_5/group_std_mean": 0.25218563675880434,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035254672169685365,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035254672169685365,
"signal/frontier_ece_reward/centered_abs_mean": 0.010893443413078786,
"signal/frontier_ece_reward/group_std_mean": 0.013623958081007003,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013616804266348482,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013616804266348482,
"step": 135
},
{
"calibration/aurc": 0.28631712868924974,
"calibration/batch_distribution_entropy": 0.9376599834930751,
"calibration/buffer_distribution_entropy": 0.9493486637369634,
"calibration/confidence_entropy": 0.44111615013169925,
"calibration/coverage@0%": 0.019140625,
"calibration/coverage@1%": 0.019140625,
"calibration/coverage@10%": 0.065625,
"calibration/coverage@15%": 0.144140625,
"calibration/coverage@20%": 0.231640625,
"calibration/coverage@25%": 0.321484375,
"calibration/coverage@30%": 0.484765625,
"calibration/coverage@5%": 0.033984375,
"calibration/ece": 0.13180397959229423,
"calibration/mean_confidence": 0.580050920506974,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 663.4,
"completions/max_terminated_length": 663.4,
"completions/mean_length": 187.68251953125,
"completions/mean_terminated_length": 187.75640258789062,
"completions/min_length": 37.0,
"completions/min_terminated_length": 91.2,
"epoch": 0.448,
"grad_norm": 0.0009328220039606094,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 466537118.0,
"reward": 0.9896220684051513,
"reward_std": 0.08325262814760208,
"rewards/accuracy_reward": 0.5126953125,
"rewards/brier_reward": 0.7939270973205567,
"rewards/confidence_uniqueness_reward": 0.9511842250823974,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.005059566115960479,
"rewards/frontier_coverage_1": 0.15485288202762604,
"rewards/frontier_coverage_10": 0.15278972536325455,
"rewards/frontier_coverage_15": 0.1487317442893982,
"rewards/frontier_coverage_20": 0.12109616249799729,
"rewards/frontier_coverage_25": 0.08842454105615616,
"rewards/frontier_coverage_5": 0.15470036566257478,
"rewards/frontier_ece_reward": 0.006252631358802318,
"signal/accuracy_reward/centered_abs_mean": 0.1038818359375,
"signal/accuracy_reward/group_std_mean": 0.13524862825870515,
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05194091796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05194091796875,
"signal/advantage_abs_mean": 0.0638619989156723,
"signal/advantage_pre_scale_abs_mean": 0.0638619989156723,
"signal/advantage_pre_scale_std": 0.10800794214010238,
"signal/advantage_std": 0.10800794214010238,
"signal/brier_reward/centered_abs_mean": 0.1454971045255661,
"signal/brier_reward/group_std_mean": 0.18698457777500152,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018187138065695763,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018187138065695763,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025177285075187683,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03345710225403309,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031471606343984604,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031471606343984604,
"signal/format_reward/centered_abs_mean": 0.000933837890625,
"signal/format_reward/group_std_mean": 0.0024258273653686045,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004822453670203686,
"signal/frontier_aurc_reward/group_std_mean": 0.008479619119316339,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.6321918934118e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.6321918934118e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.191304749250412,
"signal/frontier_coverage_1/group_std_mean": 0.24523731768131257,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003424354922026396,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003424354922026396,
"signal/frontier_coverage_10/centered_abs_mean": 0.18812133073806764,
"signal/frontier_coverage_10/group_std_mean": 0.2412860572338104,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003367371670901775,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003367371670901775,
"signal/frontier_coverage_15/centered_abs_mean": 0.1821784794330597,
"signal/frontier_coverage_15/group_std_mean": 0.23390767574310303,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032609947957098484,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032609947957098484,
"signal/frontier_coverage_20/centered_abs_mean": 0.14254549741744996,
"signal/frontier_coverage_20/group_std_mean": 0.18396810591220855,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025515642948448656,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025515642948448656,
"signal/frontier_coverage_25/centered_abs_mean": 0.0979493647813797,
"signal/frontier_coverage_25/group_std_mean": 0.12607679516077042,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017532935133203864,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017532935133203864,
"signal/frontier_coverage_5/centered_abs_mean": 0.19090119898319244,
"signal/frontier_coverage_5/group_std_mean": 0.24473724365234376,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003417131397873163,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003417131397873163,
"signal/frontier_ece_reward/centered_abs_mean": 0.009049840457737445,
"signal/frontier_ece_reward/group_std_mean": 0.011317284591495991,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011312300572171806,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011312300572171806,
"step": 140
},
{
"calibration/aurc": 0.4035057068894109,
"calibration/batch_distribution_entropy": 0.9632269579251304,
"calibration/buffer_distribution_entropy": 0.9562007001272885,
"calibration/confidence_entropy": 0.4472620321444971,
"calibration/coverage@0%": 0.0031272932974559685,
"calibration/coverage@1%": 0.0031272932974559685,
"calibration/coverage@10%": 0.006649798189823875,
"calibration/coverage@15%": 0.06261848703522505,
"calibration/coverage@20%": 0.10801660347358122,
"calibration/coverage@25%": 0.2011642306751468,
"calibration/coverage@30%": 0.2735674535225049,
"calibration/coverage@5%": 0.0031272932974559685,
"calibration/ece": 0.14442209397435643,
"calibration/mean_confidence": 0.4993456408420821,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 572.8,
"completions/max_terminated_length": 572.8,
"completions/mean_length": 193.72822265625,
"completions/mean_terminated_length": 193.8231964111328,
"completions/min_length": 18.0,
"completions/min_terminated_length": 88.4,
"epoch": 0.464,
"grad_norm": 0.0008522409480065107,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 483691711.0,
"reward": 0.9613773465156555,
"reward_std": 0.08025670200586318,
"rewards/accuracy_reward": 0.46025390625,
"rewards/brier_reward": 0.7679938077926636,
"rewards/confidence_uniqueness_reward": 0.9479737401008606,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.00637078620493412,
"rewards/frontier_coverage_1": 0.1669064313173294,
"rewards/frontier_coverage_10": 0.16667421758174897,
"rewards/frontier_coverage_15": 0.16271042451262474,
"rewards/frontier_coverage_20": 0.14486324191093444,
"rewards/frontier_coverage_25": 0.1091366857290268,
"rewards/frontier_coverage_5": 0.1669064313173294,
"rewards/frontier_ece_reward": 0.005558578902855516,
"signal/accuracy_reward/centered_abs_mean": 0.088775634765625,
"signal/accuracy_reward/group_std_mean": 0.12184955179691315,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0443878173828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0443878173828125,
"signal/advantage_abs_mean": 0.06064637377858162,
"signal/advantage_pre_scale_abs_mean": 0.06064637377858162,
"signal/advantage_pre_scale_std": 0.10379063338041306,
"signal/advantage_std": 0.10379063338041306,
"signal/brier_reward/centered_abs_mean": 0.14758062362670898,
"signal/brier_reward/group_std_mean": 0.18942284882068633,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018447577953338623,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018447577953338623,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02740153931081295,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03665589839220047,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003425192413851619,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003425192413851619,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005610701907426119,
"signal/frontier_aurc_reward/group_std_mean": 0.009466160088777542,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010043156071333214,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010043156071333214,
"signal/frontier_coverage_1/centered_abs_mean": 0.18335448503494262,
"signal/frontier_coverage_1/group_std_mean": 0.23882531821727754,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032820451073348523,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032820451073348523,
"signal/frontier_coverage_10/centered_abs_mean": 0.18226630091667176,
"signal/frontier_coverage_10/group_std_mean": 0.2374131292104721,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003262566588819027,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003262566588819027,
"signal/frontier_coverage_15/centered_abs_mean": 0.1758827894926071,
"signal/frontier_coverage_15/group_std_mean": 0.22919850349426268,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031483017839491366,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031483017839491366,
"signal/frontier_coverage_20/centered_abs_mean": 0.15302482694387437,
"signal/frontier_coverage_20/group_std_mean": 0.19992092549800872,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027391442097723486,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027391442097723486,
"signal/frontier_coverage_25/centered_abs_mean": 0.10996192842721939,
"signal/frontier_coverage_25/group_std_mean": 0.14274049401283265,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001968318480066955,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001968318480066955,
"signal/frontier_coverage_5/centered_abs_mean": 0.18335448503494262,
"signal/frontier_coverage_5/group_std_mean": 0.23882531821727754,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032820451073348523,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032820451073348523,
"signal/frontier_ece_reward/centered_abs_mean": 0.01014717761427164,
"signal/frontier_ece_reward/group_std_mean": 0.012974118441343307,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001268397201783955,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001268397201783955,
"step": 145
},
{
"calibration/aurc": 0.3002852335337244,
"calibration/batch_distribution_entropy": 0.9388510927290321,
"calibration/buffer_distribution_entropy": 0.9680216674851476,
"calibration/confidence_entropy": 0.42546727456308775,
"calibration/coverage@0%": 0.001953125,
"calibration/coverage@1%": 0.001953125,
"calibration/coverage@10%": 0.053125,
"calibration/coverage@15%": 0.075,
"calibration/coverage@20%": 0.31511695726915523,
"calibration/coverage@25%": 0.42848339268172886,
"calibration/coverage@30%": 0.494921875,
"calibration/coverage@5%": 0.025390625,
"calibration/ece": 0.15550245093764642,
"calibration/mean_confidence": 0.502191222376458,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 545.2,
"completions/max_terminated_length": 545.2,
"completions/mean_length": 194.89267578125,
"completions/mean_terminated_length": 195.0831512451172,
"completions/min_length": 37.0,
"completions/min_terminated_length": 95.4,
"epoch": 0.48,
"grad_norm": 0.0009571706177666783,
"learning_rate": 1e-06,
"loss": -0.0007,
"num_tokens": 500735444.0,
"reward": 0.9900978207588196,
"reward_std": 0.08839119076728821,
"rewards/accuracy_reward": 0.5193359375,
"rewards/brier_reward": 0.7768510937690735,
"rewards/confidence_uniqueness_reward": 0.9438364863395691,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.005226324964314699,
"rewards/frontier_coverage_1": 0.14232488125562667,
"rewards/frontier_coverage_10": 0.14232488125562667,
"rewards/frontier_coverage_15": 0.14224071949720382,
"rewards/frontier_coverage_20": 0.1379440650343895,
"rewards/frontier_coverage_25": 0.12526220083236694,
"rewards/frontier_coverage_5": 0.14232488125562667,
"rewards/frontier_ece_reward": 0.008203021250665188,
"signal/accuracy_reward/centered_abs_mean": 0.12362060546875,
"signal/accuracy_reward/group_std_mean": 0.16150453984737395,
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.061810302734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.061810302734375,
"signal/advantage_abs_mean": 0.06626208722591401,
"signal/advantage_pre_scale_abs_mean": 0.06626208722591401,
"signal/advantage_pre_scale_std": 0.1121869832277298,
"signal/advantage_std": 0.1121869832277298,
"signal/brier_reward/centered_abs_mean": 0.14678598940372467,
"signal/brier_reward/group_std_mean": 0.18890305161476134,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018348248675465584,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018348248675465584,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03009452819824219,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04022698849439621,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037618160247802736,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037618160247802736,
"signal/format_reward/centered_abs_mean": 0.00186767578125,
"signal/format_reward/group_std_mean": 0.004851654777303338,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000933837890625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005225532129406929,
"signal/frontier_aurc_reward/group_std_mean": 0.008696961030364037,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.353702189400792e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.353702189400792e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2020603150129318,
"signal/frontier_coverage_1/group_std_mean": 0.2614011108875275,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036168794613331556,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036168794613331556,
"signal/frontier_coverage_10/centered_abs_mean": 0.2020603150129318,
"signal/frontier_coverage_10/group_std_mean": 0.2614011108875275,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036168794613331556,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036168794613331556,
"signal/frontier_coverage_15/centered_abs_mean": 0.20190419256687164,
"signal/frontier_coverage_15/group_std_mean": 0.26119298338890073,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036140848882496358,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036140848882496358,
"signal/frontier_coverage_20/centered_abs_mean": 0.1944670557975769,
"signal/frontier_coverage_20/group_std_mean": 0.25180519819259645,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034809602424502374,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034809602424502374,
"signal/frontier_coverage_25/centered_abs_mean": 0.16764770150184632,
"signal/frontier_coverage_25/group_std_mean": 0.21782387495040895,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003000893723219633,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003000893723219633,
"signal/frontier_coverage_5/centered_abs_mean": 0.2020603150129318,
"signal/frontier_coverage_5/group_std_mean": 0.2614011108875275,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036168794613331556,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036168794613331556,
"signal/frontier_ece_reward/centered_abs_mean": 0.012095056474208832,
"signal/frontier_ece_reward/group_std_mean": 0.015245267003774644,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001511882059276104,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001511882059276104,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.47679932559143284,
"eval_calibration/batch_distribution_entropy": 0.9013542416014595,
"eval_calibration/buffer_distribution_entropy": 0.9639674509586168,
"eval_calibration/confidence_entropy": 0.43267242994441507,
"eval_calibration/coverage@0%": 0.0546875,
"eval_calibration/coverage@1%": 0.0546875,
"eval_calibration/coverage@10%": 0.0546875,
"eval_calibration/coverage@15%": 0.0625,
"eval_calibration/coverage@20%": 0.0625,
"eval_calibration/coverage@25%": 0.1328125,
"eval_calibration/coverage@30%": 0.2421875,
"eval_calibration/coverage@5%": 0.0546875,
"eval_calibration/ece": 0.21963807524218745,
"eval_calibration/mean_confidence": 0.4872943252421874,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 409.75,
"eval_completions/max_terminated_length": 409.75,
"eval_completions/mean_length": 198.76845169067383,
"eval_completions/mean_terminated_length": 198.76845169067383,
"eval_completions/min_length": 106.0,
"eval_completions/min_terminated_length": 106.0,
"eval_loss": 0.0,
"eval_num_tokens": 500735444.0,
"eval_reward": 0.949889749288559,
"eval_reward_std": 0.2238633930683136,
"eval_rewards/accuracy_reward": 0.427734375,
"eval_rewards/brier_reward": 0.7985336780548096,
"eval_rewards/confidence_uniqueness_reward": 0.894775390625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0058674311731010675,
"eval_rewards/frontier_coverage_1": 0.22058681026101112,
"eval_rewards/frontier_coverage_10": 0.22058681026101112,
"eval_rewards/frontier_coverage_15": 0.22058681026101112,
"eval_rewards/frontier_coverage_20": 0.21922587975859642,
"eval_rewards/frontier_coverage_25": 0.199382446706295,
"eval_rewards/frontier_coverage_5": 0.22058681026101112,
"eval_rewards/frontier_ece_reward": 0.009414957254193723,
"eval_runtime": 20.5893,
"eval_samples_per_second": 24.284,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4769287109375,
"eval_signal/accuracy_reward/group_std_mean": 0.4958357736468315,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23846435546875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23846435546875,
"eval_signal/advantage_abs_mean": 0.20569245144724846,
"eval_signal/advantage_pre_scale_abs_mean": 0.20569245144724846,
"eval_signal/advantage_pre_scale_std": 0.22155210003256798,
"eval_signal/advantage_std": 0.22155210003256798,
"eval_signal/brier_reward/centered_abs_mean": 0.20953208208084106,
"eval_signal/brier_reward/group_std_mean": 0.2654327526688576,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026191510260105133,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.026191510260105133,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04766845703125,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.056043313816189766,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00595855712890625,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00595855712890625,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007251660223118961,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.013555924640968442,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001298047136515379,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001298047136515379,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3865165114402771,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4718915820121765,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006918645463883877,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006918645463883877,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3865165114402771,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4718915820121765,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006918645463883877,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006918645463883877,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3865165114402771,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4718915820121765,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006918645463883877,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006918645463883877,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3829868882894516,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4677583575248718,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006855465122498572,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006855465122498572,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.34789665043354034,
"eval_signal/frontier_coverage_25/group_std_mean": 0.426948219537735,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006227349746041,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006227349746041,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3865165114402771,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4718915820121765,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006918645463883877,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006918645463883877,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.016906759701669216,
"eval_signal/frontier_ece_reward/group_std_mean": 0.021898964885622263,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002113344962708652,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002113344962708652,
"eval_steps_per_second": 0.194,
"step": 150
},
{
"calibration/aurc": 0.3954625163677827,
"calibration/batch_distribution_entropy": 0.9514147559853872,
"calibration/buffer_distribution_entropy": 0.9573486324097511,
"calibration/confidence_entropy": 0.4338447612841179,
"calibration/coverage@0%": 0.012128485812133072,
"calibration/coverage@1%": 0.012128485812133072,
"calibration/coverage@10%": 0.09079393957925636,
"calibration/coverage@15%": 0.13148620963796478,
"calibration/coverage@20%": 0.17995275807240704,
"calibration/coverage@25%": 0.2045934748043053,
"calibration/coverage@30%": 0.26518927348336596,
"calibration/coverage@5%": 0.012128485812133072,
"calibration/ece": 0.142788524554626,
"calibration/mean_confidence": 0.5134478515261864,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 468.8,
"completions/max_terminated_length": 468.8,
"completions/mean_length": 196.383984375,
"completions/mean_terminated_length": 196.49932861328125,
"completions/min_length": 37.4,
"completions/min_terminated_length": 93.4,
"epoch": 0.496,
"grad_norm": 0.0009579082252457738,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 518054256.0,
"reward": 1.0009922742843629,
"reward_std": 0.08279297351837159,
"rewards/accuracy_reward": 0.54296875,
"rewards/brier_reward": 0.7803852081298828,
"rewards/confidence_uniqueness_reward": 0.9533098340034485,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.004918352467939258,
"rewards/frontier_coverage_1": 0.11716257035732269,
"rewards/frontier_coverage_10": 0.11720968186855316,
"rewards/frontier_coverage_15": 0.11690075695514679,
"rewards/frontier_coverage_20": 0.11612895727157593,
"rewards/frontier_coverage_25": 0.10760040432214737,
"rewards/frontier_coverage_5": 0.11716257035732269,
"rewards/frontier_ece_reward": 0.0070794297382235525,
"signal/accuracy_reward/centered_abs_mean": 0.09886474609375,
"signal/accuracy_reward/group_std_mean": 0.13347659707069398,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049432373046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049432373046875,
"signal/advantage_abs_mean": 0.06259298101067542,
"signal/advantage_pre_scale_abs_mean": 0.06259298101067542,
"signal/advantage_pre_scale_std": 0.1071514829993248,
"signal/advantage_std": 0.1071514829993248,
"signal/brier_reward/centered_abs_mean": 0.14288402795791627,
"signal/brier_reward/group_std_mean": 0.18427936732769012,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017860503494739534,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017860503494739534,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022854289039969444,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030779007449746133,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028567861299961805,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028567861299961805,
"signal/format_reward/centered_abs_mean": 0.00147705078125,
"signal/format_reward/group_std_mean": 0.0036875875666737556,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000738525390625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000738525390625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0049670317210257055,
"signal/frontier_aurc_reward/group_std_mean": 0.00850085811689496,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.89098650077358e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.89098650077358e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1791763871908188,
"signal/frontier_coverage_1/group_std_mean": 0.23500295877456664,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003207257157191634,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003207257157191634,
"signal/frontier_coverage_10/centered_abs_mean": 0.17878322303295135,
"signal/frontier_coverage_10/group_std_mean": 0.23450087904930114,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032002195250242947,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032002195250242947,
"signal/frontier_coverage_15/centered_abs_mean": 0.17793179750442506,
"signal/frontier_coverage_15/group_std_mean": 0.2333335369825363,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031849790364503862,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031849790364503862,
"signal/frontier_coverage_20/centered_abs_mean": 0.17106520533561706,
"signal/frontier_coverage_20/group_std_mean": 0.22465102672576903,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003062067087739706,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003062067087739706,
"signal/frontier_coverage_25/centered_abs_mean": 0.1335703030228615,
"signal/frontier_coverage_25/group_std_mean": 0.17600546777248383,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023909082636237146,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023909082636237146,
"signal/frontier_coverage_5/centered_abs_mean": 0.1791763871908188,
"signal/frontier_coverage_5/group_std_mean": 0.23500295877456664,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003207257157191634,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003207257157191634,
"signal/frontier_ece_reward/centered_abs_mean": 0.011782016232609748,
"signal/frontier_ece_reward/group_std_mean": 0.015151255577802659,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014727520290762186,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014727520290762186,
"step": 155
},
{
"calibration/aurc": 0.3234285276956931,
"calibration/batch_distribution_entropy": 0.9557808515580103,
"calibration/buffer_distribution_entropy": 0.9580465637036951,
"calibration/confidence_entropy": 0.45395630618831184,
"calibration/coverage@0%": 0.04305207314090019,
"calibration/coverage@1%": 0.04618318860078278,
"calibration/coverage@10%": 0.13815970523483365,
"calibration/coverage@15%": 0.24603794642857144,
"calibration/coverage@20%": 0.3226294948630137,
"calibration/coverage@25%": 0.361328125,
"calibration/coverage@30%": 0.476953125,
"calibration/coverage@5%": 0.06457849192759295,
"calibration/ece": 0.1503633172886159,
"calibration/mean_confidence": 0.5117095801549719,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 610.4,
"completions/max_terminated_length": 610.4,
"completions/mean_length": 192.4884765625,
"completions/mean_terminated_length": 192.58353271484376,
"completions/min_length": 37.6,
"completions/min_terminated_length": 95.4,
"epoch": 0.512,
"grad_norm": 0.0009406897588633001,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 535171002.0,
"reward": 1.0028786063194275,
"reward_std": 0.08359881192445755,
"rewards/accuracy_reward": 0.54052734375,
"rewards/brier_reward": 0.7988176941871643,
"rewards/confidence_uniqueness_reward": 0.9542840957641602,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0045342623256146904,
"rewards/frontier_coverage_1": 0.13509078472852706,
"rewards/frontier_coverage_10": 0.13377612084150314,
"rewards/frontier_coverage_15": 0.131293123960495,
"rewards/frontier_coverage_20": 0.10712225437164306,
"rewards/frontier_coverage_25": 0.08412986695766449,
"rewards/frontier_coverage_5": 0.13509078472852706,
"rewards/frontier_ece_reward": 0.006775648565962911,
"signal/accuracy_reward/centered_abs_mean": 0.103765869140625,
"signal/accuracy_reward/group_std_mean": 0.13954708576202393,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0518829345703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0518829345703125,
"signal/advantage_abs_mean": 0.06277187541127205,
"signal/advantage_pre_scale_abs_mean": 0.06277187541127205,
"signal/advantage_pre_scale_std": 0.11084946841001511,
"signal/advantage_std": 0.11084946841001511,
"signal/brier_reward/centered_abs_mean": 0.1327110230922699,
"signal/brier_reward/group_std_mean": 0.17267618775367738,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016588877886533737,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016588877886533737,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021851665526628494,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02941979803144932,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027314581908285617,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027314581908285617,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_std_mean": 0.0033145629800856113,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004577672528102994,
"signal/frontier_aurc_reward/group_std_mean": 0.008000782225281,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.194033871404827e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.194033871404827e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16614727675914764,
"signal/frontier_coverage_1/group_std_mean": 0.22104130387306214,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029740359634160994,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029740359634160994,
"signal/frontier_coverage_10/centered_abs_mean": 0.16378251910209657,
"signal/frontier_coverage_10/group_std_mean": 0.217928346991539,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029317068867385386,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029317068867385386,
"signal/frontier_coverage_15/centered_abs_mean": 0.15970734059810637,
"signal/frontier_coverage_15/group_std_mean": 0.21259380280971527,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002858761325478554,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002858761325478554,
"signal/frontier_coverage_20/centered_abs_mean": 0.12538450360298156,
"signal/frontier_coverage_20/group_std_mean": 0.1680024266242981,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022443826077505947,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022443826077505947,
"signal/frontier_coverage_25/centered_abs_mean": 0.09285377115011215,
"signal/frontier_coverage_25/group_std_mean": 0.12264609932899476,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016620824113488196,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016620824113488196,
"signal/frontier_coverage_5/centered_abs_mean": 0.16614727675914764,
"signal/frontier_coverage_5/group_std_mean": 0.22104130387306214,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029740359634160994,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029740359634160994,
"signal/frontier_ece_reward/centered_abs_mean": 0.009613732434809208,
"signal/frontier_ece_reward/group_std_mean": 0.01279524564743042,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001201716554351151,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001201716554351151,
"step": 160
},
{
"calibration/aurc": 0.21515431340038269,
"calibration/batch_distribution_entropy": 0.9465215933514806,
"calibration/buffer_distribution_entropy": 0.9642959605124626,
"calibration/confidence_entropy": 0.43351707967556574,
"calibration/coverage@0%": 0.010546875,
"calibration/coverage@1%": 0.010546875,
"calibration/coverage@10%": 0.233984375,
"calibration/coverage@15%": 0.381640625,
"calibration/coverage@20%": 0.526953125,
"calibration/coverage@25%": 0.6390625,
"calibration/coverage@30%": 0.74140625,
"calibration/coverage@5%": 0.108203125,
"calibration/ece": 0.11720595065823454,
"calibration/mean_confidence": 0.5373770825359314,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 512.0,
"completions/max_terminated_length": 512.0,
"completions/mean_length": 188.80302734375,
"completions/mean_terminated_length": 188.8581787109375,
"completions/min_length": 55.0,
"completions/min_terminated_length": 90.8,
"epoch": 0.528,
"grad_norm": 0.0008741988567635417,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 552133881.0,
"reward": 1.0039166688919068,
"reward_std": 0.08170044273138047,
"rewards/accuracy_reward": 0.53916015625,
"rewards/brier_reward": 0.8085092425346374,
"rewards/confidence_uniqueness_reward": 0.9507844090461731,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.004248648602515459,
"rewards/frontier_coverage_1": 0.15175041258335115,
"rewards/frontier_coverage_10": 0.14860184788703917,
"rewards/frontier_coverage_15": 0.13345287144184112,
"rewards/frontier_coverage_20": 0.11305398046970368,
"rewards/frontier_coverage_25": 0.08651100248098373,
"rewards/frontier_coverage_5": 0.15175041258335115,
"rewards/frontier_ece_reward": 0.005140899121761322,
"signal/accuracy_reward/centered_abs_mean": 0.107843017578125,
"signal/accuracy_reward/group_std_mean": 0.1392485275864601,
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0539215087890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0539215087890625,
"signal/advantage_abs_mean": 0.06308979764580727,
"signal/advantage_pre_scale_abs_mean": 0.06308979764580727,
"signal/advantage_pre_scale_std": 0.11193345636129379,
"signal/advantage_std": 0.11193345636129379,
"signal/brier_reward/centered_abs_mean": 0.13161712884902954,
"signal/brier_reward/group_std_mean": 0.16742828488349915,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016452141106128693,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016452141106128693,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023607824370265006,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03156716227531433,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029509780462831257,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029509780462831257,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004438076773658395,
"signal/frontier_aurc_reward/group_std_mean": 0.007885826658457518,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.9441572597716e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.9441572597716e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17143626213073732,
"signal/frontier_coverage_1/group_std_mean": 0.22192691266536713,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030687090009450914,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030687090009450914,
"signal/frontier_coverage_10/centered_abs_mean": 0.16864987313747407,
"signal/frontier_coverage_10/group_std_mean": 0.21828924119472504,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030188326723873614,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030188326723873614,
"signal/frontier_coverage_15/centered_abs_mean": 0.14959897696971894,
"signal/frontier_coverage_15/group_std_mean": 0.19355920553207398,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026778215542435646,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026778215542435646,
"signal/frontier_coverage_20/centered_abs_mean": 0.12214481383562088,
"signal/frontier_coverage_20/group_std_mean": 0.1572314739227295,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002186392107978463,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002186392107978463,
"signal/frontier_coverage_25/centered_abs_mean": 0.08615210205316544,
"signal/frontier_coverage_25/group_std_mean": 0.10980434715747833,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015421226155012846,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015421226155012846,
"signal/frontier_coverage_5/centered_abs_mean": 0.17143626213073732,
"signal/frontier_coverage_5/group_std_mean": 0.22192691266536713,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030687090009450914,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030687090009450914,
"signal/frontier_ece_reward/centered_abs_mean": 0.006812410987913608,
"signal/frontier_ece_reward/group_std_mean": 0.008691665530204774,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000851551373489201,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000851551373489201,
"step": 165
},
{
"calibration/aurc": 0.23020906280908213,
"calibration/batch_distribution_entropy": 0.918057457167898,
"calibration/buffer_distribution_entropy": 0.9609664227706617,
"calibration/confidence_entropy": 0.4125372809498028,
"calibration/coverage@0%": 0.02421875,
"calibration/coverage@1%": 0.02421875,
"calibration/coverage@10%": 0.20859375,
"calibration/coverage@15%": 0.36484375,
"calibration/coverage@20%": 0.486328125,
"calibration/coverage@25%": 0.605078125,
"calibration/coverage@30%": 0.708984375,
"calibration/coverage@5%": 0.058203125,
"calibration/ece": 0.076503771808841,
"calibration/mean_confidence": 0.5710803343088411,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 553.0,
"completions/max_terminated_length": 553.0,
"completions/mean_length": 188.80380859375,
"completions/mean_terminated_length": 188.85922546386718,
"completions/min_length": 57.0,
"completions/min_terminated_length": 92.4,
"epoch": 0.544,
"grad_norm": 0.000987795414403081,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 569230816.0,
"reward": 1.0138700008392334,
"reward_std": 0.08416398167610169,
"rewards/accuracy_reward": 0.571484375,
"rewards/brier_reward": 0.7931686043739319,
"rewards/confidence_uniqueness_reward": 0.9540070772171021,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.004576193448156118,
"rewards/frontier_coverage_1": 0.10272571891546249,
"rewards/frontier_coverage_10": 0.09751921743154526,
"rewards/frontier_coverage_15": 0.08434878885746003,
"rewards/frontier_coverage_20": 0.07184197083115577,
"rewards/frontier_coverage_25": 0.07199315205216408,
"rewards/frontier_coverage_5": 0.10249073505401611,
"rewards/frontier_ece_reward": 0.0036463214084506033,
"signal/accuracy_reward/centered_abs_mean": 0.1113525390625,
"signal/accuracy_reward/group_std_mean": 0.14786846041679383,
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05567626953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05567626953125,
"signal/advantage_abs_mean": 0.0635548323392868,
"signal/advantage_pre_scale_abs_mean": 0.0635548323392868,
"signal/advantage_pre_scale_std": 0.11159455627202988,
"signal/advantage_std": 0.11159455627202988,
"signal/brier_reward/centered_abs_mean": 0.13828030824661255,
"signal/brier_reward/group_std_mean": 0.17761871218681335,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01728503853082657,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01728503853082657,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022624427825212477,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029629110544919967,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028280534781515597,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028280534781515597,
"signal/format_reward/centered_abs_mean": 0.000555419921875,
"signal/format_reward/group_std_mean": 0.0013209730386734009,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004484716011211276,
"signal/frontier_aurc_reward/group_std_mean": 0.00775440065190196,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.027641015360132e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.027641015360132e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17472692728042602,
"signal/frontier_coverage_1/group_std_mean": 0.22890773713588713,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031276117544621227,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031276117544621227,
"signal/frontier_coverage_10/centered_abs_mean": 0.16465979516506196,
"signal/frontier_coverage_10/group_std_mean": 0.21564705669879913,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029474102426320314,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029474102426320314,
"signal/frontier_coverage_15/centered_abs_mean": 0.13918684124946595,
"signal/frontier_coverage_15/group_std_mean": 0.18255962431430817,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002491444256156683,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002491444256156683,
"signal/frontier_coverage_20/centered_abs_mean": 0.10881915837526321,
"signal/frontier_coverage_20/group_std_mean": 0.14249514639377595,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019478628411889077,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019478628411889077,
"signal/frontier_coverage_25/centered_abs_mean": 0.08776176124811172,
"signal/frontier_coverage_25/group_std_mean": 0.11297746896743774,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001570935477502644,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001570935477502644,
"signal/frontier_coverage_5/centered_abs_mean": 0.17419021725654601,
"signal/frontier_coverage_5/group_std_mean": 0.22820683419704438,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031180046498775482,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031180046498775482,
"signal/frontier_ece_reward/centered_abs_mean": 0.006010690052062273,
"signal/frontier_ece_reward/group_std_mean": 0.007854269072413444,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007513362565077841,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007513362565077841,
"step": 170
},
{
"calibration/aurc": 0.25932787789545503,
"calibration/batch_distribution_entropy": 0.951260038050609,
"calibration/buffer_distribution_entropy": 0.9453655904627768,
"calibration/confidence_entropy": 0.42789571607951987,
"calibration/coverage@0%": 0.038671875,
"calibration/coverage@1%": 0.058984375,
"calibration/coverage@10%": 0.25703125,
"calibration/coverage@15%": 0.330078125,
"calibration/coverage@20%": 0.380859375,
"calibration/coverage@25%": 0.5,
"calibration/coverage@30%": 0.5640625,
"calibration/coverage@5%": 0.16875,
"calibration/ece": 0.13416383694243011,
"calibration/mean_confidence": 0.5382109562135288,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 549.0,
"completions/max_terminated_length": 549.0,
"completions/mean_length": 187.33359375,
"completions/mean_terminated_length": 187.33359375,
"completions/min_length": 90.4,
"completions/min_terminated_length": 90.4,
"epoch": 0.56,
"grad_norm": 0.0008024996495805681,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 585970520.0,
"reward": 0.996568763256073,
"reward_std": 0.07741106897592545,
"rewards/accuracy_reward": 0.52548828125,
"rewards/brier_reward": 0.8031778693199157,
"rewards/confidence_uniqueness_reward": 0.9556594848632812,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0049142176285386086,
"rewards/frontier_coverage_1": 0.14636741876602172,
"rewards/frontier_coverage_10": 0.14227018058300017,
"rewards/frontier_coverage_15": 0.1258744031190872,
"rewards/frontier_coverage_20": 0.10139468014240265,
"rewards/frontier_coverage_25": 0.08633039444684983,
"rewards/frontier_coverage_5": 0.14596393704414368,
"rewards/frontier_ece_reward": 0.005320987664163113,
"signal/accuracy_reward/centered_abs_mean": 0.089776611328125,
"signal/accuracy_reward/group_std_mean": 0.12582256644964218,
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0448883056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0448883056640625,
"signal/advantage_abs_mean": 0.05710288733243942,
"signal/advantage_pre_scale_abs_mean": 0.05710288733243942,
"signal/advantage_pre_scale_std": 0.1032014474272728,
"signal/advantage_std": 0.1032014474272728,
"signal/brier_reward/centered_abs_mean": 0.13279659748077394,
"signal/brier_reward/group_std_mean": 0.17281450629234313,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016599574685096742,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016599574685096742,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019998049736022948,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0253103855997324,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024997562170028685,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024997562170028685,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0049385009333491325,
"signal/frontier_aurc_reward/group_std_mean": 0.00919720744714141,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.839916408760473e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.839916408760473e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16153999269008637,
"signal/frontier_coverage_1/group_std_mean": 0.21193089783191682,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028915658127516507,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028915658127516507,
"signal/frontier_coverage_10/centered_abs_mean": 0.15317419469356536,
"signal/frontier_coverage_10/group_std_mean": 0.2011122077703476,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002741818083450198,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002741818083450198,
"signal/frontier_coverage_15/centered_abs_mean": 0.12960880994796753,
"signal/frontier_coverage_15/group_std_mean": 0.17075131833553314,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023199975956231357,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023199975956231357,
"signal/frontier_coverage_20/centered_abs_mean": 0.09810810983181,
"signal/frontier_coverage_20/group_std_mean": 0.12899805158376693,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017561352113261818,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017561352113261818,
"signal/frontier_coverage_25/centered_abs_mean": 0.07935424745082856,
"signal/frontier_coverage_25/group_std_mean": 0.10310705602169037,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001420441037043929,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001420441037043929,
"signal/frontier_coverage_5/centered_abs_mean": 0.16019822359085084,
"signal/frontier_coverage_5/group_std_mean": 0.21020157337188722,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002867548214271665,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002867548214271665,
"signal/frontier_ece_reward/centered_abs_mean": 0.00652648089453578,
"signal/frontier_ece_reward/group_std_mean": 0.008323358558118343,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008158101118169725,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008158101118169725,
"step": 175
},
{
"calibration/aurc": 0.2958790568751456,
"calibration/batch_distribution_entropy": 0.9495256459575139,
"calibration/buffer_distribution_entropy": 0.951260865377282,
"calibration/confidence_entropy": 0.4454251218251891,
"calibration/coverage@0%": 0.041015625,
"calibration/coverage@1%": 0.041015625,
"calibration/coverage@10%": 0.11875,
"calibration/coverage@15%": 0.223046875,
"calibration/coverage@20%": 0.3078125,
"calibration/coverage@25%": 0.424609375,
"calibration/coverage@30%": 0.54453125,
"calibration/coverage@5%": 0.08046875,
"calibration/ece": 0.09598984374999998,
"calibration/mean_confidence": 0.53377890625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 522.2,
"completions/max_terminated_length": 522.2,
"completions/mean_length": 187.3328125,
"completions/mean_terminated_length": 187.388818359375,
"completions/min_length": 58.2,
"completions/min_terminated_length": 92.4,
"epoch": 0.576,
"grad_norm": 0.0008470589527860284,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 603075432.0,
"reward": 0.9913701772689819,
"reward_std": 0.07224039733409882,
"rewards/accuracy_reward": 0.52255859375,
"rewards/brier_reward": 0.7876186609268189,
"rewards/confidence_uniqueness_reward": 0.9528305530548096,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.004688137117773295,
"rewards/frontier_coverage_1": 0.13433505594730377,
"rewards/frontier_coverage_10": 0.13162233978509902,
"rewards/frontier_coverage_15": 0.11250192821025848,
"rewards/frontier_coverage_20": 0.09179114252328872,
"rewards/frontier_coverage_25": 0.0720499463379383,
"rewards/frontier_coverage_5": 0.1340462476015091,
"rewards/frontier_ece_reward": 0.005268103256821632,
"signal/accuracy_reward/centered_abs_mean": 0.082318115234375,
"signal/accuracy_reward/group_std_mean": 0.11643143147230148,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0411590576171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0411590576171875,
"signal/advantage_abs_mean": 0.053145038336515425,
"signal/advantage_pre_scale_abs_mean": 0.053145038336515425,
"signal/advantage_pre_scale_std": 0.0985955536365509,
"signal/advantage_std": 0.0985955536365509,
"signal/brier_reward/centered_abs_mean": 0.12527994215488433,
"signal/brier_reward/group_std_mean": 0.16230980157852173,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01565999276936054,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01565999276936054,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021803252398967743,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0284559216350317,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002725406549870968,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002725406549870968,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004217976331710815,
"signal/frontier_aurc_reward/group_std_mean": 0.007248710375279188,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.550177397206426e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.550177397206426e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15368797779083251,
"signal/frontier_coverage_1/group_std_mean": 0.2016217142343521,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027510148007422685,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027510148007422685,
"signal/frontier_coverage_10/centered_abs_mean": 0.15032892525196076,
"signal/frontier_coverage_10/group_std_mean": 0.19736047983169555,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00269088763743639,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00269088763743639,
"signal/frontier_coverage_15/centered_abs_mean": 0.13276640176773072,
"signal/frontier_coverage_15/group_std_mean": 0.17462487518787384,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023765185847878455,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023765185847878455,
"signal/frontier_coverage_20/centered_abs_mean": 0.11213624775409699,
"signal/frontier_coverage_20/group_std_mean": 0.1470758020877838,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002007238776423037,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002007238776423037,
"signal/frontier_coverage_25/centered_abs_mean": 0.08577371686697007,
"signal/frontier_coverage_25/group_std_mean": 0.11143407225608826,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015353495255112648,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015353495255112648,
"signal/frontier_coverage_5/centered_abs_mean": 0.153548663854599,
"signal/frontier_coverage_5/group_std_mean": 0.2014426976442337,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002748521091416478,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002748521091416478,
"signal/frontier_ece_reward/centered_abs_mean": 0.006935225892812013,
"signal/frontier_ece_reward/group_std_mean": 0.008793661557137965,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008669032366015017,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008669032366015017,
"step": 180
},
{
"calibration/aurc": 0.29565313432436074,
"calibration/batch_distribution_entropy": 0.9519147171667937,
"calibration/buffer_distribution_entropy": 0.9669101539842341,
"calibration/confidence_entropy": 0.438182712066848,
"calibration/coverage@0%": 0.022670774217221135,
"calibration/coverage@1%": 0.022670774217221135,
"calibration/coverage@10%": 0.18370841487279843,
"calibration/coverage@15%": 0.3006405944227006,
"calibration/coverage@20%": 0.4382116866438356,
"calibration/coverage@25%": 0.5226226149706458,
"calibration/coverage@30%": 0.616796875,
"calibration/coverage@5%": 0.1020333904109589,
"calibration/ece": 0.13677111743117693,
"calibration/mean_confidence": 0.4978104322263574,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 489.0,
"completions/max_terminated_length": 489.0,
"completions/mean_length": 188.58310546875,
"completions/mean_terminated_length": 188.60177001953124,
"completions/min_length": 74.0,
"completions/min_terminated_length": 93.4,
"epoch": 0.592,
"grad_norm": 0.0009303243714384735,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 620174235.0,
"reward": 0.9929086327552795,
"reward_std": 0.0719732478260994,
"rewards/accuracy_reward": 0.5203125,
"rewards/brier_reward": 0.7961714744567872,
"rewards/confidence_uniqueness_reward": 0.9497325897216797,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0038590751588344573,
"rewards/frontier_coverage_1": 0.14824921488761902,
"rewards/frontier_coverage_10": 0.14601882994174958,
"rewards/frontier_coverage_15": 0.1366260230541229,
"rewards/frontier_coverage_20": 0.112667715549469,
"rewards/frontier_coverage_25": 0.08532776832580566,
"rewards/frontier_coverage_5": 0.1483199715614319,
"rewards/frontier_ece_reward": 0.0057619762606918815,
"signal/accuracy_reward/centered_abs_mean": 0.09581298828125,
"signal/accuracy_reward/group_std_mean": 0.12752275317907333,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047906494140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047906494140625,
"signal/advantage_abs_mean": 0.05412104800343513,
"signal/advantage_pre_scale_abs_mean": 0.05412104800343513,
"signal/advantage_pre_scale_std": 0.09765883535146713,
"signal/advantage_std": 0.09765883535146713,
"signal/brier_reward/centered_abs_mean": 0.1217377707362175,
"signal/brier_reward/group_std_mean": 0.15788544118404388,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015217221342027187,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015217221342027187,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024076349288225173,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030972678586840628,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030095436610281467,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030095436610281467,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003197602741420269,
"signal/frontier_aurc_reward/group_std_mean": 0.005368777271360159,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.723708600271493e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.723708600271493e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17332786023616792,
"signal/frontier_coverage_1/group_std_mean": 0.22274211347103118,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00310256858356297,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00310256858356297,
"signal/frontier_coverage_10/centered_abs_mean": 0.16972556412220002,
"signal/frontier_coverage_10/group_std_mean": 0.21819985806941986,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003038087533786893,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003038087533786893,
"signal/frontier_coverage_15/centered_abs_mean": 0.15815592408180237,
"signal/frontier_coverage_15/group_std_mean": 0.2036920189857483,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002830990916118026,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002830990916118026,
"signal/frontier_coverage_20/centered_abs_mean": 0.13099839091300963,
"signal/frontier_coverage_20/group_std_mean": 0.16890535950660707,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002344871172681451,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002344871172681451,
"signal/frontier_coverage_25/centered_abs_mean": 0.0981606438755989,
"signal/frontier_coverage_25/group_std_mean": 0.12592942267656326,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017570754047483207,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017570754047483207,
"signal/frontier_coverage_5/centered_abs_mean": 0.1729302763938904,
"signal/frontier_coverage_5/group_std_mean": 0.2222334235906601,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030954517889767883,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030954517889767883,
"signal/frontier_ece_reward/centered_abs_mean": 0.006981126964092255,
"signal/frontier_ece_reward/group_std_mean": 0.008749399241060019,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008726408705115318,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008726408705115318,
"step": 185
},
{
"calibration/aurc": 0.21553234752408906,
"calibration/batch_distribution_entropy": 0.9258052442161204,
"calibration/buffer_distribution_entropy": 0.9670883188013116,
"calibration/confidence_entropy": 0.4191689447590557,
"calibration/coverage@0%": 0.052734375,
"calibration/coverage@1%": 0.089453125,
"calibration/coverage@10%": 0.332421875,
"calibration/coverage@15%": 0.42734375,
"calibration/coverage@20%": 0.498046875,
"calibration/coverage@25%": 0.6125,
"calibration/coverage@30%": 0.708984375,
"calibration/coverage@5%": 0.23828125,
"calibration/ece": 0.10801203653413369,
"calibration/mean_confidence": 0.4664215572158662,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 544.0,
"completions/max_terminated_length": 544.0,
"completions/mean_length": 190.70390625,
"completions/mean_terminated_length": 190.74066467285155,
"completions/min_length": 54.8,
"completions/min_terminated_length": 92.8,
"epoch": 0.608,
"grad_norm": 0.0007160541717894375,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 637126531.0,
"reward": 0.9983951568603515,
"reward_std": 0.06347624734044074,
"rewards/accuracy_reward": 0.52333984375,
"rewards/brier_reward": 0.8208484888076782,
"rewards/confidence_uniqueness_reward": 0.9440263032913208,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0034591706935316324,
"rewards/frontier_coverage_1": 0.18073319643735886,
"rewards/frontier_coverage_10": 0.16723452508449554,
"rewards/frontier_coverage_15": 0.14739094227552413,
"rewards/frontier_coverage_20": 0.11896448433399201,
"rewards/frontier_coverage_25": 0.08385140150785446,
"rewards/frontier_coverage_5": 0.17713448405265808,
"rewards/frontier_ece_reward": 0.004859468247741461,
"signal/accuracy_reward/centered_abs_mean": 0.088348388671875,
"signal/accuracy_reward/group_std_mean": 0.11715475767850876,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441741943359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0441741943359375,
"signal/advantage_abs_mean": 0.04768226221203804,
"signal/advantage_pre_scale_abs_mean": 0.04768226221203804,
"signal/advantage_pre_scale_std": 0.08954823464155197,
"signal/advantage_std": 0.08954823464155197,
"signal/brier_reward/centered_abs_mean": 0.11456647962331772,
"signal/brier_reward/group_std_mean": 0.1489032119512558,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014320809952914714,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014320809952914714,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027299407124519347,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03458471596240997,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034124258905649184,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034124258905649184,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002890928415581584,
"signal/frontier_aurc_reward/group_std_mean": 0.004889083281159401,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.174761536181904e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.174761536181904e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1733110725879669,
"signal/frontier_coverage_1/group_std_mean": 0.2228671282529831,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003102268138900399,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003102268138900399,
"signal/frontier_coverage_10/centered_abs_mean": 0.16030363142490386,
"signal/frontier_coverage_10/group_std_mean": 0.2063254475593567,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028694348875433207,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028694348875433207,
"signal/frontier_coverage_15/centered_abs_mean": 0.142013718187809,
"signal/frontier_coverage_15/group_std_mean": 0.18288592696189881,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025420454796403645,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025420454796403645,
"signal/frontier_coverage_20/centered_abs_mean": 0.11396982818841934,
"signal/frontier_coverage_20/group_std_mean": 0.1466364860534668,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020400599110871553,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020400599110871553,
"signal/frontier_coverage_25/centered_abs_mean": 0.07192447036504745,
"signal/frontier_coverage_25/group_std_mean": 0.09156043976545333,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012874479871243238,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012874479871243238,
"signal/frontier_coverage_5/centered_abs_mean": 0.1697360724210739,
"signal/frontier_coverage_5/group_std_mean": 0.21828807294368743,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003038275660946965,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003038275660946965,
"signal/frontier_ece_reward/centered_abs_mean": 0.005411684419959784,
"signal/frontier_ece_reward/group_std_mean": 0.007156631723046303,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000676460552494973,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000676460552494973,
"step": 190
},
{
"calibration/aurc": 0.25029732013112604,
"calibration/batch_distribution_entropy": 0.9539323020464103,
"calibration/buffer_distribution_entropy": 0.9637877765447789,
"calibration/confidence_entropy": 0.44635937342937215,
"calibration/coverage@0%": 0.015234375,
"calibration/coverage@1%": 0.015234375,
"calibration/coverage@10%": 0.193359375,
"calibration/coverage@15%": 0.32890625,
"calibration/coverage@20%": 0.440625,
"calibration/coverage@25%": 0.534375,
"calibration/coverage@30%": 0.620703125,
"calibration/coverage@5%": 0.0828125,
"calibration/ece": 0.11607683302811309,
"calibration/mean_confidence": 0.4840353309144009,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 501.0,
"completions/max_terminated_length": 501.0,
"completions/mean_length": 194.76396484375,
"completions/mean_terminated_length": 194.78286743164062,
"completions/min_length": 74.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.624,
"grad_norm": 0.001018967479467392,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 654464818.0,
"reward": 0.9963904023170471,
"reward_std": 0.075007364153862,
"rewards/accuracy_reward": 0.5236328125,
"rewards/brier_reward": 0.8108787059783935,
"rewards/confidence_uniqueness_reward": 0.9516486644744873,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.00426640659570694,
"rewards/frontier_coverage_1": 0.16479457467794417,
"rewards/frontier_coverage_10": 0.14903722554445267,
"rewards/frontier_coverage_15": 0.12789833545684814,
"rewards/frontier_coverage_20": 0.09996737092733383,
"rewards/frontier_coverage_25": 0.07884926050901413,
"rewards/frontier_coverage_5": 0.16016590297222139,
"rewards/frontier_ece_reward": 0.0032679932191967964,
"signal/accuracy_reward/centered_abs_mean": 0.1026611328125,
"signal/accuracy_reward/group_std_mean": 0.1322247326374054,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05133056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05133056640625,
"signal/advantage_abs_mean": 0.05846463441848755,
"signal/advantage_pre_scale_abs_mean": 0.05846463441848755,
"signal/advantage_pre_scale_std": 0.1034909039735794,
"signal/advantage_std": 0.1034909039735794,
"signal/brier_reward/centered_abs_mean": 0.12926639318466188,
"signal/brier_reward/group_std_mean": 0.16571001708507538,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016158299148082735,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016158299148082735,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02218361124396324,
"signal/confidence_uniqueness_reward/group_std_mean": 0.028404848650097847,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002772951405495405,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002772951405495405,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035941647365689278,
"signal/frontier_aurc_reward/group_std_mean": 0.005814951099455357,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.433554735849612e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.433554735849612e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1797071576118469,
"signal/frontier_coverage_1/group_std_mean": 0.232409331202507,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003216758044436574,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003216758044436574,
"signal/frontier_coverage_10/centered_abs_mean": 0.15397950112819672,
"signal/frontier_coverage_10/group_std_mean": 0.19958638846874238,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002756233001127839,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002756233001127839,
"signal/frontier_coverage_15/centered_abs_mean": 0.12312380224466324,
"signal/frontier_coverage_15/group_std_mean": 0.15969986021518706,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002203916013240814,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002203916013240814,
"signal/frontier_coverage_20/centered_abs_mean": 0.08721509128808975,
"signal/frontier_coverage_20/group_std_mean": 0.11252322345972061,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015611500246450305,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015611500246450305,
"signal/frontier_coverage_25/centered_abs_mean": 0.06863628327846527,
"signal/frontier_coverage_25/group_std_mean": 0.08759003430604935,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001228589448146522,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001228589448146522,
"signal/frontier_coverage_5/centered_abs_mean": 0.17220644056797027,
"signal/frontier_coverage_5/group_std_mean": 0.22277928292751312,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030824951361864807,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030824951361864807,
"signal/frontier_ece_reward/centered_abs_mean": 0.005346813146024943,
"signal/frontier_ece_reward/group_std_mean": 0.006953202188014984,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006683516432531178,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006683516432531178,
"step": 195
},
{
"calibration/aurc": 0.2574396932872196,
"calibration/batch_distribution_entropy": 0.9476952756873953,
"calibration/buffer_distribution_entropy": 0.9622491335756894,
"calibration/confidence_entropy": 0.45384763235188447,
"calibration/coverage@0%": 0.051953125,
"calibration/coverage@1%": 0.051953125,
"calibration/coverage@10%": 0.311328125,
"calibration/coverage@15%": 0.348046875,
"calibration/coverage@20%": 0.38046875,
"calibration/coverage@25%": 0.44296875,
"calibration/coverage@30%": 0.6203125,
"calibration/coverage@5%": 0.212890625,
"calibration/ece": 0.17372297164873932,
"calibration/mean_confidence": 0.5381806231031552,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 495.0,
"completions/max_terminated_length": 495.0,
"completions/mean_length": 197.94150390625,
"completions/mean_terminated_length": 198.13479614257812,
"completions/min_length": 37.4,
"completions/min_terminated_length": 96.2,
"epoch": 0.64,
"grad_norm": 0.0009096023277379572,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 671834427.0,
"reward": 1.0166439652442931,
"reward_std": 0.0668403185904026,
"rewards/accuracy_reward": 0.572265625,
"rewards/brier_reward": 0.8049077749252319,
"rewards/confidence_uniqueness_reward": 0.9526163697242737,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.004572666762396693,
"rewards/frontier_coverage_1": 0.11914323419332504,
"rewards/frontier_coverage_10": 0.11158685833215713,
"rewards/frontier_coverage_15": 0.09276209697127343,
"rewards/frontier_coverage_20": 0.0855812445282936,
"rewards/frontier_coverage_25": 0.09080441743135452,
"rewards/frontier_coverage_5": 0.11887658834457397,
"rewards/frontier_ece_reward": 0.002520751999691129,
"signal/accuracy_reward/centered_abs_mean": 0.078125,
"signal/accuracy_reward/group_std_mean": 0.10895988643169403,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0390625,
"signal/advantage_abs_mean": 0.04865444153547287,
"signal/advantage_pre_scale_abs_mean": 0.04865444153547287,
"signal/advantage_pre_scale_std": 0.09524674415588379,
"signal/advantage_std": 0.09524674415588379,
"signal/brier_reward/centered_abs_mean": 0.11449733972549439,
"signal/brier_reward/group_std_mean": 0.15064277946949006,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014312167465686799,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014312167465686799,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02249297872185707,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02909063771367073,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002811622340232134,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002811622340232134,
"signal/format_reward/centered_abs_mean": 0.0016357421875,
"signal/format_reward/group_std_mean": 0.0029698234982788565,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00081787109375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00081787109375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004220958100631833,
"signal/frontier_aurc_reward/group_std_mean": 0.007582212705165148,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.555514748673886e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.555514748673886e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14690053462982178,
"signal/frontier_coverage_1/group_std_mean": 0.19440690875053407,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002629519393667579,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002629519393667579,
"signal/frontier_coverage_10/centered_abs_mean": 0.1303493395447731,
"signal/frontier_coverage_10/group_std_mean": 0.1721312552690506,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023332530166953803,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023332530166953803,
"signal/frontier_coverage_15/centered_abs_mean": 0.10296084582805634,
"signal/frontier_coverage_15/group_std_mean": 0.13540229350328445,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018429991323500872,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018429991323500872,
"signal/frontier_coverage_20/centered_abs_mean": 0.07925621718168259,
"signal/frontier_coverage_20/group_std_mean": 0.10276458263397217,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001418686262331903,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001418686262331903,
"signal/frontier_coverage_25/centered_abs_mean": 0.07305071949958801,
"signal/frontier_coverage_25/group_std_mean": 0.09430216252803802,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013076077681034803,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013076077681034803,
"signal/frontier_coverage_5/centered_abs_mean": 0.1449471652507782,
"signal/frontier_coverage_5/group_std_mean": 0.19173803627490998,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025945540983229876,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025945540983229876,
"signal/frontier_ece_reward/centered_abs_mean": 0.005757212173193693,
"signal/frontier_ece_reward/group_std_mean": 0.007660919800400734,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007196515216492116,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007196515216492116,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.493511159818044,
"eval_calibration/batch_distribution_entropy": 0.8832849886693642,
"eval_calibration/buffer_distribution_entropy": 0.9693483416656018,
"eval_calibration/confidence_entropy": 0.44068152354587353,
"eval_calibration/coverage@0%": 0.0234375,
"eval_calibration/coverage@1%": 0.0234375,
"eval_calibration/coverage@10%": 0.0234375,
"eval_calibration/coverage@15%": 0.0234375,
"eval_calibration/coverage@20%": 0.0234375,
"eval_calibration/coverage@25%": 0.15246975806451613,
"eval_calibration/coverage@30%": 0.2777217741935484,
"eval_calibration/coverage@5%": 0.0234375,
"eval_calibration/ece": 0.1927923387096774,
"eval_calibration/mean_confidence": 0.4916633064516129,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 456.25,
"eval_completions/max_terminated_length": 456.25,
"eval_completions/mean_length": 199.99340057373047,
"eval_completions/mean_terminated_length": 200.38811492919922,
"eval_completions/min_length": 83.75,
"eval_completions/min_terminated_length": 111.0,
"eval_loss": 0.0,
"eval_num_tokens": 671834427.0,
"eval_reward": 0.9421386420726776,
"eval_reward_std": 0.2367052398622036,
"eval_rewards/accuracy_reward": 0.431640625,
"eval_rewards/brier_reward": 0.7890757769346237,
"eval_rewards/confidence_uniqueness_reward": 0.894862025976181,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.006231023231521249,
"eval_rewards/frontier_coverage_1": 0.2020808570086956,
"eval_rewards/frontier_coverage_10": 0.19078554958105087,
"eval_rewards/frontier_coverage_15": 0.15176815912127495,
"eval_rewards/frontier_coverage_20": 0.1030439492315054,
"eval_rewards/frontier_coverage_25": 0.06260449066758156,
"eval_rewards/frontier_coverage_5": 0.2010589875280857,
"eval_rewards/frontier_ece_reward": 0.004809546982869506,
"eval_runtime": 29.2937,
"eval_samples_per_second": 17.069,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4708251953125,
"eval_signal/accuracy_reward/group_std_mean": 0.49239178001880646,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23541259765625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23541259765625,
"eval_signal/advantage_abs_mean": 0.21742305904626846,
"eval_signal/advantage_pre_scale_abs_mean": 0.21742305904626846,
"eval_signal/advantage_pre_scale_std": 0.23469773307442665,
"eval_signal/advantage_std": 0.23469773307442665,
"eval_signal/brier_reward/centered_abs_mean": 0.2195335328578949,
"eval_signal/brier_reward/group_std_mean": 0.2732725068926811,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027441691607236862,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027441691607236862,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04748190473765135,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05865086428821087,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059352380922064185,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059352380922064185,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007879440323449671,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.014890573918819427,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001410419754392933,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001410419754392933,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3686321824789047,
"eval_signal/frontier_coverage_1/group_std_mean": 0.45531778782606125,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0065985157852992415,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0065985157852992415,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3471181392669678,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4296090304851532,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00621341448277235,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00621341448277235,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.27391282469034195,
"eval_signal/frontier_coverage_15/group_std_mean": 0.342199482023716,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004903039196506143,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004903039196506143,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.18069317936897278,
"eval_signal/frontier_coverage_20/group_std_mean": 0.22858576849102974,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003234407864511013,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003234407864511013,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.12702187150716782,
"eval_signal/frontier_coverage_25/group_std_mean": 0.16410764679312706,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002273691410664469,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002273691410664469,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3674790486693382,
"eval_signal/frontier_coverage_5/group_std_mean": 0.45392612367868423,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006577874883078039,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006577874883078039,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.009357131319120526,
"eval_signal/frontier_ece_reward/group_std_mean": 0.013854497345164418,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011696414148900658,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011696414148900658,
"eval_steps_per_second": 0.137,
"step": 200
},
{
"calibration/aurc": 0.41946149037477376,
"calibration/batch_distribution_entropy": 0.9577894026302515,
"calibration/buffer_distribution_entropy": 0.9715740737633431,
"calibration/confidence_entropy": 0.47288876571780036,
"calibration/coverage@0%": 0.005470281862745098,
"calibration/coverage@1%": 0.005470281862745098,
"calibration/coverage@10%": 0.005470281862745098,
"calibration/coverage@15%": 0.030860906862745098,
"calibration/coverage@20%": 0.060939031862745095,
"calibration/coverage@25%": 0.1355484068627451,
"calibration/coverage@30%": 0.2285171568627451,
"calibration/coverage@5%": 0.005470281862745098,
"calibration/ece": 0.14839613280099095,
"calibration/mean_confidence": 0.5153986525908454,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 494.8,
"completions/max_terminated_length": 494.8,
"completions/mean_length": 196.7990234375,
"completions/mean_terminated_length": 196.87784729003906,
"completions/min_length": 57.8,
"completions/min_terminated_length": 96.0,
"epoch": 0.656,
"grad_norm": 0.0009435078827664256,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 688706193.0,
"reward": 0.9806293368339538,
"reward_std": 0.08091690391302109,
"rewards/accuracy_reward": 0.5013671875,
"rewards/brier_reward": 0.7843360900878906,
"rewards/confidence_uniqueness_reward": 0.9562799215316773,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.005319964047521353,
"rewards/frontier_coverage_1": 0.1383114606142044,
"rewards/frontier_coverage_10": 0.131970576941967,
"rewards/frontier_coverage_15": 0.11439355909824371,
"rewards/frontier_coverage_20": 0.08972573131322861,
"rewards/frontier_coverage_25": 0.06971659734845162,
"rewards/frontier_coverage_5": 0.13804129958152772,
"rewards/frontier_ece_reward": 0.003979566600173711,
"signal/accuracy_reward/centered_abs_mean": 0.1010498046875,
"signal/accuracy_reward/group_std_mean": 0.13649359196424485,
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05052490234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05052490234375,
"signal/advantage_abs_mean": 0.06074377223849296,
"signal/advantage_pre_scale_abs_mean": 0.06074377223849296,
"signal/advantage_pre_scale_std": 0.10985483229160309,
"signal/advantage_std": 0.10985483229160309,
"signal/brier_reward/centered_abs_mean": 0.12995689660310744,
"signal/brier_reward/group_std_mean": 0.165918031334877,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01624461207538843,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01624461207538843,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019338706508278847,
"signal/confidence_uniqueness_reward/group_std_mean": 0.025475112721323967,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002417338313534856,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002417338313534856,
"signal/format_reward/centered_abs_mean": 0.000933837890625,
"signal/format_reward/group_std_mean": 0.0024258273653686045,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004763511009514332,
"signal/frontier_aurc_reward/group_std_mean": 0.008127985801547766,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.526684250682592e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.526684250682592e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1602654814720154,
"signal/frontier_coverage_1/group_std_mean": 0.20846983194351196,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028687520418316124,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028687520418316124,
"signal/frontier_coverage_10/centered_abs_mean": 0.15199364870786666,
"signal/frontier_coverage_10/group_std_mean": 0.19783683717250825,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002720686187967658,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002720686187967658,
"signal/frontier_coverage_15/centered_abs_mean": 0.12867532670497894,
"signal/frontier_coverage_15/group_std_mean": 0.16779664158821106,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002303288271650672,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002303288271650672,
"signal/frontier_coverage_20/centered_abs_mean": 0.09723804891109467,
"signal/frontier_coverage_20/group_std_mean": 0.1266666144132614,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017405609833076596,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017405609833076596,
"signal/frontier_coverage_25/centered_abs_mean": 0.07228973060846329,
"signal/frontier_coverage_25/group_std_mean": 0.09323944300413131,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012939860811457039,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012939860811457039,
"signal/frontier_coverage_5/centered_abs_mean": 0.15987294018268586,
"signal/frontier_coverage_5/group_std_mean": 0.20797096490859984,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00286172553896904,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00286172553896904,
"signal/frontier_ece_reward/centered_abs_mean": 0.006007364951074123,
"signal/frontier_ece_reward/group_std_mean": 0.007822115626186132,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007509206188842654,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007509206188842654,
"step": 205
},
{
"calibration/aurc": 0.2926994548714511,
"calibration/batch_distribution_entropy": 0.9444086203632773,
"calibration/buffer_distribution_entropy": 0.9737498655958629,
"calibration/confidence_entropy": 0.44393618787575706,
"calibration/coverage@0%": 0.013671875,
"calibration/coverage@1%": 0.013671875,
"calibration/coverage@10%": 0.10234375,
"calibration/coverage@15%": 0.19301002935420744,
"calibration/coverage@20%": 0.30673770792563604,
"calibration/coverage@25%": 0.38919092465753424,
"calibration/coverage@30%": 0.5204722663894324,
"calibration/coverage@5%": 0.075,
"calibration/ece": 0.15594339598631118,
"calibration/mean_confidence": 0.539981359215427,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 499.6,
"completions/max_terminated_length": 499.6,
"completions/mean_length": 195.71875,
"completions/mean_terminated_length": 195.81506958007813,
"completions/min_length": 18.2,
"completions/min_terminated_length": 92.8,
"epoch": 0.672,
"grad_norm": 0.0009184509981423616,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 705623793.0,
"reward": 0.9914369463920594,
"reward_std": 0.07370885983109474,
"rewards/accuracy_reward": 0.5158203125,
"rewards/brier_reward": 0.7974509596824646,
"rewards/confidence_uniqueness_reward": 0.9497063755989075,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.00501512442715466,
"rewards/frontier_coverage_1": 0.15677720606327056,
"rewards/frontier_coverage_10": 0.1533482253551483,
"rewards/frontier_coverage_15": 0.1392940640449524,
"rewards/frontier_coverage_20": 0.12043006420135498,
"rewards/frontier_coverage_25": 0.10223577320575714,
"rewards/frontier_coverage_5": 0.1563895434141159,
"rewards/frontier_ece_reward": 0.005090564861893654,
"signal/accuracy_reward/centered_abs_mean": 0.0990234375,
"signal/accuracy_reward/group_std_mean": 0.12875483930110931,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04951171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04951171875,
"signal/advantage_abs_mean": 0.05632427111268044,
"signal/advantage_pre_scale_abs_mean": 0.05632427111268044,
"signal/advantage_pre_scale_std": 0.10220988094806671,
"signal/advantage_std": 0.10220988094806671,
"signal/brier_reward/centered_abs_mean": 0.12671652734279631,
"signal/brier_reward/group_std_mean": 0.16150912046432495,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01583956591784954,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01583956591784954,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024024555832147597,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03174975365400314,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030030694790184496,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030030694790184496,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004770603589713573,
"signal/frontier_aurc_reward/group_std_mean": 0.008204363659024239,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.539380214642734e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.539380214642734e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16620306968688964,
"signal/frontier_coverage_1/group_std_mean": 0.21228016614913942,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029750348068773745,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029750348068773745,
"signal/frontier_coverage_10/centered_abs_mean": 0.15870766639709472,
"signal/frontier_coverage_10/group_std_mean": 0.2031603991985321,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028408670797944067,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028408670797944067,
"signal/frontier_coverage_15/centered_abs_mean": 0.143101367354393,
"signal/frontier_coverage_15/group_std_mean": 0.18369907140731812,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025615144055336713,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025615144055336713,
"signal/frontier_coverage_20/centered_abs_mean": 0.11705817133188248,
"signal/frontier_coverage_20/group_std_mean": 0.1504491925239563,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002095341356471181,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002095341356471181,
"signal/frontier_coverage_25/centered_abs_mean": 0.09472694396972656,
"signal/frontier_coverage_25/group_std_mean": 0.12149419337511062,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016956122126430273,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016956122126430273,
"signal/frontier_coverage_5/centered_abs_mean": 0.16547443866729736,
"signal/frontier_coverage_5/group_std_mean": 0.21141978204250336,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002961992286145687,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002961992286145687,
"signal/frontier_ece_reward/centered_abs_mean": 0.006938015948981047,
"signal/frontier_ece_reward/group_std_mean": 0.008949788101017476,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008672519936226309,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008672519936226309,
"step": 210
},
{
"calibration/aurc": 0.331979046438107,
"calibration/batch_distribution_entropy": 0.9499702685412638,
"calibration/buffer_distribution_entropy": 0.9659590854530025,
"calibration/confidence_entropy": 0.4506932877065063,
"calibration/coverage@0%": 0.009392581947162425,
"calibration/coverage@1%": 0.009392581947162425,
"calibration/coverage@10%": 0.08327421722113502,
"calibration/coverage@15%": 0.1763132950097847,
"calibration/coverage@20%": 0.37963246086105673,
"calibration/coverage@25%": 0.4668335677592955,
"calibration/coverage@30%": 0.5540140349804306,
"calibration/coverage@5%": 0.009392581947162425,
"calibration/ece": 0.14634282791695438,
"calibration/mean_confidence": 0.5238675511923947,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 575.4,
"completions/max_terminated_length": 575.4,
"completions/mean_length": 203.140234375,
"completions/mean_terminated_length": 203.19979248046874,
"completions/min_length": 39.6,
"completions/min_terminated_length": 95.8,
"epoch": 0.688,
"grad_norm": 0.0009329606546089053,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 722657869.0,
"reward": 1.0020609378814698,
"reward_std": 0.07278908342123032,
"rewards/accuracy_reward": 0.54384765625,
"rewards/brier_reward": 0.7905821681022644,
"rewards/confidence_uniqueness_reward": 0.9534401655197143,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.004365035938099027,
"rewards/frontier_coverage_1": 0.12487183883786201,
"rewards/frontier_coverage_10": 0.12341351956129074,
"rewards/frontier_coverage_15": 0.11195187419652938,
"rewards/frontier_coverage_20": 0.09282574728131295,
"rewards/frontier_coverage_25": 0.07641910165548324,
"rewards/frontier_coverage_5": 0.12487183883786201,
"rewards/frontier_ece_reward": 0.005168016534298658,
"signal/accuracy_reward/centered_abs_mean": 0.097796630859375,
"signal/accuracy_reward/group_std_mean": 0.1325370654463768,
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0488983154296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0488983154296875,
"signal/advantage_abs_mean": 0.05411761626601219,
"signal/advantage_pre_scale_abs_mean": 0.05411761626601219,
"signal/advantage_pre_scale_std": 0.10028181821107865,
"signal/advantage_std": 0.10028181821107865,
"signal/brier_reward/centered_abs_mean": 0.12061165422201156,
"signal/brier_reward/group_std_mean": 0.15503303110599517,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015076456777751445,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015076456777751445,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021210041642189027,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027603601291775703,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026512552052736283,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026512552052736283,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003994943108409643,
"signal/frontier_aurc_reward/group_std_mean": 0.0068402208853513,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.150947931222618e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.150947931222618e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1628277599811554,
"signal/frontier_coverage_1/group_std_mean": 0.2098786264657974,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002914616884663701,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002914616884663701,
"signal/frontier_coverage_10/centered_abs_mean": 0.1613152176141739,
"signal/frontier_coverage_10/group_std_mean": 0.2077154040336609,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002887542266398668,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002887542266398668,
"signal/frontier_coverage_15/centered_abs_mean": 0.14416129291057586,
"signal/frontier_coverage_15/group_std_mean": 0.1860842227935791,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002580487076193094,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002580487076193094,
"signal/frontier_coverage_20/centered_abs_mean": 0.10915819257497787,
"signal/frontier_coverage_20/group_std_mean": 0.14186433851718902,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001953931665048003,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001953931665048003,
"signal/frontier_coverage_25/centered_abs_mean": 0.0854944184422493,
"signal/frontier_coverage_25/group_std_mean": 0.11132525205612183,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015303500229492783,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015303500229492783,
"signal/frontier_coverage_5/centered_abs_mean": 0.1628277599811554,
"signal/frontier_coverage_5/group_std_mean": 0.2098786264657974,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002914616884663701,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002914616884663701,
"signal/frontier_ece_reward/centered_abs_mean": 0.007584027945995331,
"signal/frontier_ece_reward/group_std_mean": 0.009723598696291446,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009480034932494164,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009480034932494164,
"step": 215
},
{
"calibration/aurc": 0.2608545612019092,
"calibration/batch_distribution_entropy": 0.9275631931023062,
"calibration/buffer_distribution_entropy": 0.9580475700263857,
"calibration/confidence_entropy": 0.42389808546660135,
"calibration/coverage@0%": 0.003908543297455968,
"calibration/coverage@1%": 0.003908543297455968,
"calibration/coverage@10%": 0.09900241560665361,
"calibration/coverage@15%": 0.15652519569471623,
"calibration/coverage@20%": 0.28116514799412917,
"calibration/coverage@25%": 0.5425230858610568,
"calibration/coverage@30%": 0.653099009295499,
"calibration/coverage@5%": 0.01093979329745597,
"calibration/ece": 0.10845385821198668,
"calibration/mean_confidence": 0.5364261070313574,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 609.4,
"completions/max_terminated_length": 609.4,
"completions/mean_length": 203.096484375,
"completions/mean_terminated_length": 203.17578125,
"completions/min_length": 60.2,
"completions/min_terminated_length": 94.2,
"epoch": 0.704,
"grad_norm": 0.0007699209963902831,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 739603721.0,
"reward": 1.0045687437057496,
"reward_std": 0.0682177111506462,
"rewards/accuracy_reward": 0.5419921875,
"rewards/brier_reward": 0.8065258026123047,
"rewards/confidence_uniqueness_reward": 0.9545469999313354,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.00427693435922265,
"rewards/frontier_coverage_1": 0.14027815759181977,
"rewards/frontier_coverage_10": 0.13724253922700883,
"rewards/frontier_coverage_15": 0.12166428565979004,
"rewards/frontier_coverage_20": 0.10210901647806167,
"rewards/frontier_coverage_25": 0.08873845413327217,
"rewards/frontier_coverage_5": 0.14027815759181977,
"rewards/frontier_ece_reward": 0.00510264802724123,
"signal/accuracy_reward/centered_abs_mean": 0.0825927734375,
"signal/accuracy_reward/group_std_mean": 0.11197452545166016,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04129638671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04129638671875,
"signal/advantage_abs_mean": 0.050767023116350174,
"signal/advantage_pre_scale_abs_mean": 0.050767023116350174,
"signal/advantage_pre_scale_std": 0.09501631557941437,
"signal/advantage_std": 0.09501631557941437,
"signal/brier_reward/centered_abs_mean": 0.12148282825946807,
"signal/brier_reward/group_std_mean": 0.15641909837722778,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01518535353243351,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01518535353243351,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019867252558469772,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02657301612198353,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024834065698087215,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024834065698087215,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00388403395190835,
"signal/frontier_aurc_reward/group_std_mean": 0.006640929076820612,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.952420808374882e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.952420808374882e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15835388600826264,
"signal/frontier_coverage_1/group_std_mean": 0.2032252162694931,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028345345519483088,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028345345519483088,
"signal/frontier_coverage_10/centered_abs_mean": 0.15482834577560425,
"signal/frontier_coverage_10/group_std_mean": 0.19876473546028137,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027714272029697893,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027714272029697893,
"signal/frontier_coverage_15/centered_abs_mean": 0.13561422675848006,
"signal/frontier_coverage_15/group_std_mean": 0.1744435727596283,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002427494595758617,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002427494595758617,
"signal/frontier_coverage_20/centered_abs_mean": 0.1050514042377472,
"signal/frontier_coverage_20/group_std_mean": 0.1355624422430992,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018804199760779738,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018804199760779738,
"signal/frontier_coverage_25/centered_abs_mean": 0.08059178441762924,
"signal/frontier_coverage_25/group_std_mean": 0.10396585315465927,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014425928937271237,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014425928937271237,
"signal/frontier_coverage_5/centered_abs_mean": 0.15835388600826264,
"signal/frontier_coverage_5/group_std_mean": 0.2032252162694931,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028345345519483088,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028345345519483088,
"signal/frontier_ece_reward/centered_abs_mean": 0.006038886867463589,
"signal/frontier_ece_reward/group_std_mean": 0.007732567843049765,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007548608584329486,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007548608584329486,
"step": 220
},
{
"calibration/aurc": 0.23482665586025736,
"calibration/batch_distribution_entropy": 0.9612250773623764,
"calibration/buffer_distribution_entropy": 0.9586416777876605,
"calibration/confidence_entropy": 0.44620637832833,
"calibration/coverage@0%": 0.084375,
"calibration/coverage@1%": 0.100390625,
"calibration/coverage@10%": 0.258203125,
"calibration/coverage@15%": 0.341796875,
"calibration/coverage@20%": 0.434765625,
"calibration/coverage@25%": 0.578125,
"calibration/coverage@30%": 0.66875,
"calibration/coverage@5%": 0.154296875,
"calibration/ece": 0.14590859443467447,
"calibration/mean_confidence": 0.5181223305126723,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 537.4,
"completions/max_terminated_length": 537.4,
"completions/mean_length": 202.41982421875,
"completions/mean_terminated_length": 202.45970458984374,
"completions/min_length": 77.6,
"completions/min_terminated_length": 96.0,
"epoch": 0.72,
"grad_norm": 0.0011968121398240328,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 756686356.0,
"reward": 1.0170644760131835,
"reward_std": 0.06888751536607743,
"rewards/accuracy_reward": 0.5703125,
"rewards/brier_reward": 0.8109802842140198,
"rewards/confidence_uniqueness_reward": 0.956030797958374,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0033496411517262457,
"rewards/frontier_coverage_1": 0.11873116791248321,
"rewards/frontier_coverage_10": 0.10979411900043487,
"rewards/frontier_coverage_15": 0.09770300686359405,
"rewards/frontier_coverage_20": 0.08028805404901504,
"rewards/frontier_coverage_25": 0.07443805187940597,
"rewards/frontier_coverage_5": 0.11705376356840133,
"rewards/frontier_ece_reward": 0.0038806302938610314,
"signal/accuracy_reward/centered_abs_mean": 0.09033203125,
"signal/accuracy_reward/group_std_mean": 0.1240386575460434,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045166015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045166015625,
"signal/advantage_abs_mean": 0.05042407363653183,
"signal/advantage_pre_scale_abs_mean": 0.05042407363653183,
"signal/advantage_pre_scale_std": 0.09537018090486526,
"signal/advantage_std": 0.09537018090486526,
"signal/brier_reward/centered_abs_mean": 0.1113901287317276,
"signal/brier_reward/group_std_mean": 0.1435837119817734,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01392376609146595,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01392376609146595,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019289053231477737,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024880537763237953,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002411131653934717,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002411131653934717,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028278836980462074,
"signal/frontier_aurc_reward/group_std_mean": 0.004779759328812361,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.061911360826343e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.061911360826343e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15533297061920165,
"signal/frontier_coverage_1/group_std_mean": 0.2013178825378418,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002780460100620985,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002780460100620985,
"signal/frontier_coverage_10/centered_abs_mean": 0.14142029881477355,
"signal/frontier_coverage_10/group_std_mean": 0.18364064693450927,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025314232800155876,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025314232800155876,
"signal/frontier_coverage_15/centered_abs_mean": 0.12093036472797394,
"signal/frontier_coverage_15/group_std_mean": 0.15705038607120514,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00216465350240469,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00216465350240469,
"signal/frontier_coverage_20/centered_abs_mean": 0.08552543967962264,
"signal/frontier_coverage_20/group_std_mean": 0.1104985460639,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015309053473174573,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015309053473174573,
"signal/frontier_coverage_25/centered_abs_mean": 0.060288567841053006,
"signal/frontier_coverage_25/group_std_mean": 0.07731353044509888,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010791653301566839,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010791653301566839,
"signal/frontier_coverage_5/centered_abs_mean": 0.15248898267745972,
"signal/frontier_coverage_5/group_std_mean": 0.19771113097667695,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027295527514070274,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027295527514070274,
"signal/frontier_ece_reward/centered_abs_mean": 0.005354726873338223,
"signal/frontier_ece_reward/group_std_mean": 0.006952607538551092,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006693408591672778,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006693408591672778,
"step": 225
},
{
"calibration/aurc": 0.25437434102883794,
"calibration/batch_distribution_entropy": 0.9427088800876608,
"calibration/buffer_distribution_entropy": 0.97025031179046,
"calibration/confidence_entropy": 0.44377589500656056,
"calibration/coverage@0%": 0.00859910102739726,
"calibration/coverage@1%": 0.00859910102739726,
"calibration/coverage@10%": 0.07586075097847358,
"calibration/coverage@15%": 0.2610812133072407,
"calibration/coverage@20%": 0.4486492477984344,
"calibration/coverage@25%": 0.5533749694227006,
"calibration/coverage@30%": 0.6444800330234834,
"calibration/coverage@5%": 0.04028941413894325,
"calibration/ece": 0.12570871691467667,
"calibration/mean_confidence": 0.5756968235145307,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 617.8,
"completions/max_terminated_length": 617.8,
"completions/mean_length": 203.03740234375,
"completions/mean_terminated_length": 203.11683349609376,
"completions/min_length": 40.0,
"completions/min_terminated_length": 99.2,
"epoch": 0.736,
"grad_norm": 0.0007375147542916238,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 773705043.0,
"reward": 1.0119364738464356,
"reward_std": 0.06964921355247497,
"rewards/accuracy_reward": 0.563671875,
"rewards/brier_reward": 0.801817262172699,
"rewards/confidence_uniqueness_reward": 0.9554043889045716,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.004348062258213758,
"rewards/frontier_coverage_1": 0.11800303161144257,
"rewards/frontier_coverage_10": 0.10368544608354568,
"rewards/frontier_coverage_15": 0.08785041272640229,
"rewards/frontier_coverage_20": 0.07442668229341506,
"rewards/frontier_coverage_25": 0.08015984743833542,
"rewards/frontier_coverage_5": 0.11402959078550338,
"rewards/frontier_ece_reward": 0.003366558370180428,
"signal/accuracy_reward/centered_abs_mean": 0.0880126953125,
"signal/accuracy_reward/group_std_mean": 0.11547122150659561,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04400634765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04400634765625,
"signal/advantage_abs_mean": 0.05290033966302872,
"signal/advantage_pre_scale_abs_mean": 0.05290033966302872,
"signal/advantage_pre_scale_std": 0.10007934272289276,
"signal/advantage_std": 0.10007934272289276,
"signal/brier_reward/centered_abs_mean": 0.1161247432231903,
"signal/brier_reward/group_std_mean": 0.14959118664264678,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014515592902898788,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014515592902898788,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020079517364501955,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027113460749387742,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025099396705627443,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025099396705627443,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036679123528301715,
"signal/frontier_aurc_reward/group_std_mean": 0.006219440698623657,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.565562944160774e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.565562944160774e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14942230582237243,
"signal/frontier_coverage_1/group_std_mean": 0.1930805951356888,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002674659201875329,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002674659201875329,
"signal/frontier_coverage_10/centered_abs_mean": 0.12416762858629227,
"signal/frontier_coverage_10/group_std_mean": 0.16071577370166779,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002222600392997265,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002222600392997265,
"signal/frontier_coverage_15/centered_abs_mean": 0.09914140552282333,
"signal/frontier_coverage_15/group_std_mean": 0.1284227952361107,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017746312078088522,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017746312078088522,
"signal/frontier_coverage_20/centered_abs_mean": 0.0690557137131691,
"signal/frontier_coverage_20/group_std_mean": 0.0893009215593338,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012360972352325917,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012360972352325917,
"signal/frontier_coverage_25/centered_abs_mean": 0.061588793992996216,
"signal/frontier_coverage_25/group_std_mean": 0.07924119979143143,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011024394305422902,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011024394305422902,
"signal/frontier_coverage_5/centered_abs_mean": 0.14388204216957093,
"signal/frontier_coverage_5/group_std_mean": 0.1859131395816803,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025754883885383607,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025754883885383607,
"signal/frontier_ece_reward/centered_abs_mean": 0.005603797361254692,
"signal/frontier_ece_reward/group_std_mean": 0.00726176118478179,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007004746701568365,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007004746701568365,
"step": 230
},
{
"calibration/aurc": 0.2542425110643244,
"calibration/batch_distribution_entropy": 0.9244142332019362,
"calibration/buffer_distribution_entropy": 0.9614472860277317,
"calibration/confidence_entropy": 0.4114774239833169,
"calibration/coverage@0%": 0.02265930772994129,
"calibration/coverage@1%": 0.02265930772994129,
"calibration/coverage@10%": 0.18804962695694716,
"calibration/coverage@15%": 0.32053877201565556,
"calibration/coverage@20%": 0.43189670988258316,
"calibration/coverage@25%": 0.5123937438845401,
"calibration/coverage@30%": 0.6385946673189824,
"calibration/coverage@5%": 0.05900348581213307,
"calibration/ece": 0.12094403317232238,
"calibration/mean_confidence": 0.5156067658688501,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 550.8,
"completions/max_terminated_length": 550.8,
"completions/mean_length": 201.76005859375,
"completions/mean_terminated_length": 201.7993927001953,
"completions/min_length": 56.8,
"completions/min_terminated_length": 94.8,
"epoch": 0.752,
"grad_norm": 0.0007739876164123416,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 790998266.0,
"reward": 1.0114245295524598,
"reward_std": 0.06914101243019104,
"rewards/accuracy_reward": 0.5615234375,
"rewards/brier_reward": 0.8030555367469787,
"rewards/confidence_uniqueness_reward": 0.9526637196540833,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.00500394320115447,
"rewards/frontier_coverage_1": 0.12411178350448608,
"rewards/frontier_coverage_10": 0.11705171167850495,
"rewards/frontier_coverage_15": 0.09946493953466415,
"rewards/frontier_coverage_20": 0.07714821100234985,
"rewards/frontier_coverage_25": 0.06873356178402901,
"rewards/frontier_coverage_5": 0.12328074276447296,
"rewards/frontier_ece_reward": 0.00414937473833561,
"signal/accuracy_reward/centered_abs_mean": 0.08248291015625,
"signal/accuracy_reward/group_std_mean": 0.11040505021810532,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041241455078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.041241455078125,
"signal/advantage_abs_mean": 0.051580803096294404,
"signal/advantage_pre_scale_abs_mean": 0.051580803096294404,
"signal/advantage_pre_scale_std": 0.10050711333751679,
"signal/advantage_std": 0.10050711333751679,
"signal/brier_reward/centered_abs_mean": 0.11054162234067917,
"signal/brier_reward/group_std_mean": 0.14409123212099076,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013817702792584896,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013817702792584896,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02160814180970192,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02840392105281353,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00270101772621274,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00270101772621274,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004394466057419777,
"signal/frontier_aurc_reward/group_std_mean": 0.007578754145652056,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.866094238124788e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.866094238124788e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1325658917427063,
"signal/frontier_coverage_1/group_std_mean": 0.17441392540931702,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023729294305667283,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023729294305667283,
"signal/frontier_coverage_10/centered_abs_mean": 0.11937666237354279,
"signal/frontier_coverage_10/group_std_mean": 0.15744740068912505,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002136842184700072,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002136842184700072,
"signal/frontier_coverage_15/centered_abs_mean": 0.09750326424837112,
"signal/frontier_coverage_15/group_std_mean": 0.12895927131175994,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017453083768486977,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017453083768486977,
"signal/frontier_coverage_20/centered_abs_mean": 0.06711127907037735,
"signal/frontier_coverage_20/group_std_mean": 0.08834214359521866,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001201291847974062,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001201291847974062,
"signal/frontier_coverage_25/centered_abs_mean": 0.05732980817556381,
"signal/frontier_coverage_25/group_std_mean": 0.07379360496997833,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010262035531923175,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010262035531923175,
"signal/frontier_coverage_5/centered_abs_mean": 0.130748051404953,
"signal/frontier_coverage_5/group_std_mean": 0.17201564610004424,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002340390020981431,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002340390020981431,
"signal/frontier_ece_reward/centered_abs_mean": 0.005427785962820053,
"signal/frontier_ece_reward/group_std_mean": 0.007156200148165226,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006784732453525066,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006784732453525066,
"step": 235
},
{
"calibration/aurc": 0.2680052403381016,
"calibration/batch_distribution_entropy": 0.9573398018322792,
"calibration/buffer_distribution_entropy": 0.9519497378308387,
"calibration/confidence_entropy": 0.4460752107004281,
"calibration/coverage@0%": 0.027352923189823873,
"calibration/coverage@1%": 0.06407167318982387,
"calibration/coverage@10%": 0.2875634478962818,
"calibration/coverage@15%": 0.3614068615459883,
"calibration/coverage@20%": 0.440722082925636,
"calibration/coverage@25%": 0.495826198630137,
"calibration/coverage@30%": 0.5575671171722114,
"calibration/coverage@5%": 0.1629105002446184,
"calibration/ece": 0.1554626040746608,
"calibration/mean_confidence": 0.5443639516897113,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 514.6,
"completions/max_terminated_length": 514.6,
"completions/mean_length": 206.78037109375,
"completions/mean_terminated_length": 206.82103271484374,
"completions/min_length": 55.6,
"completions/min_terminated_length": 95.0,
"epoch": 0.768,
"grad_norm": 0.0009317373624071479,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 808048401.0,
"reward": 0.9914386510848999,
"reward_std": 0.07168597877025604,
"rewards/accuracy_reward": 0.51220703125,
"rewards/brier_reward": 0.8089797616004943,
"rewards/confidence_uniqueness_reward": 0.9535501837730408,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.004470060393214226,
"rewards/frontier_coverage_1": 0.1624919980764389,
"rewards/frontier_coverage_10": 0.15464245676994323,
"rewards/frontier_coverage_15": 0.13659146577119827,
"rewards/frontier_coverage_20": 0.11587388515472412,
"rewards/frontier_coverage_25": 0.08935976326465607,
"rewards/frontier_coverage_5": 0.16063615083694457,
"rewards/frontier_ece_reward": 0.00459696571342647,
"signal/accuracy_reward/centered_abs_mean": 0.084478759765625,
"signal/accuracy_reward/group_std_mean": 0.11444791853427887,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422393798828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422393798828125,
"signal/advantage_abs_mean": 0.05375881567597389,
"signal/advantage_pre_scale_abs_mean": 0.05375881567597389,
"signal/advantage_pre_scale_std": 0.10270356833934784,
"signal/advantage_std": 0.10270356833934784,
"signal/brier_reward/centered_abs_mean": 0.1144163504242897,
"signal/brier_reward/group_std_mean": 0.14760856330394745,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014302043803036213,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014302043803036213,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020865751802921294,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02727658823132515,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026082189753651617,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026082189753651617,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003939648577943444,
"signal/frontier_aurc_reward/group_std_mean": 0.006472258921712637,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.051970751490444e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.051970751490444e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1397405296564102,
"signal/frontier_coverage_1/group_std_mean": 0.18141476809978485,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025013554841279984,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025013554841279984,
"signal/frontier_coverage_10/centered_abs_mean": 0.12998889088630677,
"signal/frontier_coverage_10/group_std_mean": 0.16883896589279174,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002326801046729088,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002326801046729088,
"signal/frontier_coverage_15/centered_abs_mean": 0.10919785648584365,
"signal/frontier_coverage_15/group_std_mean": 0.14220541715621948,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001954641635529697,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001954641635529697,
"signal/frontier_coverage_20/centered_abs_mean": 0.08695479780435562,
"signal/frontier_coverage_20/group_std_mean": 0.11316211223602295,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015564908506348729,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015564908506348729,
"signal/frontier_coverage_25/centered_abs_mean": 0.06411130949854851,
"signal/frontier_coverage_25/group_std_mean": 0.08257153034210205,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011475923703983427,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011475923703983427,
"signal/frontier_coverage_5/centered_abs_mean": 0.13725003153085708,
"signal/frontier_coverage_5/group_std_mean": 0.17820312082767487,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024567753542214634,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024567753542214634,
"signal/frontier_ece_reward/centered_abs_mean": 0.00509942676872015,
"signal/frontier_ece_reward/group_std_mean": 0.006556581333279609,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006374283460900187,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006374283460900187,
"step": 240
},
{
"calibration/aurc": 0.31887326727492027,
"calibration/batch_distribution_entropy": 0.8982541815262202,
"calibration/buffer_distribution_entropy": 0.9546690565896577,
"calibration/confidence_entropy": 0.3898735935268853,
"calibration/coverage@0%": 0.034765625,
"calibration/coverage@1%": 0.060546875,
"calibration/coverage@10%": 0.1390625,
"calibration/coverage@15%": 0.206640625,
"calibration/coverage@20%": 0.287890625,
"calibration/coverage@25%": 0.398828125,
"calibration/coverage@30%": 0.48046875,
"calibration/coverage@5%": 0.119140625,
"calibration/ece": 0.17711883435089865,
"calibration/mean_confidence": 0.5617744832093632,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 497.4,
"completions/max_terminated_length": 497.4,
"completions/mean_length": 209.9095703125,
"completions/mean_terminated_length": 209.9095703125,
"completions/min_length": 93.4,
"completions/min_terminated_length": 93.4,
"epoch": 0.784,
"grad_norm": 0.0009413471561856568,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 825372243.0,
"reward": 1.0024082660675049,
"reward_std": 0.07197575569152832,
"rewards/accuracy_reward": 0.5474609375,
"rewards/brier_reward": 0.7850143671035766,
"rewards/confidence_uniqueness_reward": 0.9518775939941406,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.004796002665534616,
"rewards/frontier_coverage_1": 0.12093576937913894,
"rewards/frontier_coverage_10": 0.11836400926113129,
"rewards/frontier_coverage_15": 0.10636832416057587,
"rewards/frontier_coverage_20": 0.08585690557956696,
"rewards/frontier_coverage_25": 0.06992573365569114,
"rewards/frontier_coverage_5": 0.12120040059089661,
"rewards/frontier_ece_reward": 0.004053607257083059,
"signal/accuracy_reward/centered_abs_mean": 0.09532470703125,
"signal/accuracy_reward/group_std_mean": 0.12385913580656052,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047662353515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047662353515625,
"signal/advantage_abs_mean": 0.055704934149980546,
"signal/advantage_pre_scale_abs_mean": 0.055704934149980546,
"signal/advantage_pre_scale_std": 0.10282771587371826,
"signal/advantage_std": 0.10282771587371826,
"signal/brier_reward/centered_abs_mean": 0.12030295431613922,
"signal/brier_reward/group_std_mean": 0.1544749230146408,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015037869289517402,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015037869289517402,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021519970893859864,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027513662725687026,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002689996361732483,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002689996361732483,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004173616506159305,
"signal/frontier_aurc_reward/group_std_mean": 0.006660338584333658,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.470773343811742e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.470773343811742e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14796494394540788,
"signal/frontier_coverage_1/group_std_mean": 0.19138481914997102,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002648572437465191,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002648572437465191,
"signal/frontier_coverage_10/centered_abs_mean": 0.1446523040533066,
"signal/frontier_coverage_10/group_std_mean": 0.1870588093996048,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025892761070281265,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025892761070281265,
"signal/frontier_coverage_15/centered_abs_mean": 0.12852992713451386,
"signal/frontier_coverage_15/group_std_mean": 0.16658880710601806,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002300685690715909,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002300685690715909,
"signal/frontier_coverage_20/centered_abs_mean": 0.09782901257276536,
"signal/frontier_coverage_20/group_std_mean": 0.12701622247695923,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017511392710730434,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017511392710730434,
"signal/frontier_coverage_25/centered_abs_mean": 0.06910897642374039,
"signal/frontier_coverage_25/group_std_mean": 0.08850326985120774,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012370506301522255,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012370506301522255,
"signal/frontier_coverage_5/centered_abs_mean": 0.1476401075720787,
"signal/frontier_coverage_5/group_std_mean": 0.1909413605928421,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026427579112350942,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026427579112350942,
"signal/frontier_ece_reward/centered_abs_mean": 0.0046972375828772785,
"signal/frontier_ece_reward/group_std_mean": 0.006057073548436165,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005871546978596598,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005871546978596598,
"step": 245
},
{
"calibration/aurc": 0.21368478004389746,
"calibration/batch_distribution_entropy": 0.9310709167649789,
"calibration/buffer_distribution_entropy": 0.9377174764554581,
"calibration/confidence_entropy": 0.41769588237193833,
"calibration/coverage@0%": 0.012515288649706458,
"calibration/coverage@1%": 0.012515288649706458,
"calibration/coverage@10%": 0.2700090203033268,
"calibration/coverage@15%": 0.44392429060665356,
"calibration/coverage@20%": 0.56352969055773,
"calibration/coverage@25%": 0.6412984650195694,
"calibration/coverage@30%": 0.7346952972113503,
"calibration/coverage@5%": 0.057463918786692755,
"calibration/ece": 0.09516806292580356,
"calibration/mean_confidence": 0.5285699362658447,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 487.8,
"completions/max_terminated_length": 487.8,
"completions/mean_length": 210.52841796875,
"completions/mean_terminated_length": 210.5489288330078,
"completions/min_length": 84.0,
"completions/min_terminated_length": 102.2,
"epoch": 0.8,
"grad_norm": 0.0010132838506251574,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 842538614.0,
"reward": 1.0227719783782958,
"reward_std": 0.067044947296381,
"rewards/accuracy_reward": 0.5787109375,
"rewards/brier_reward": 0.8152576923370362,
"rewards/confidence_uniqueness_reward": 0.951277756690979,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.004456493817269802,
"rewards/frontier_coverage_1": 0.1280773937702179,
"rewards/frontier_coverage_10": 0.12541920095682144,
"rewards/frontier_coverage_15": 0.11543396264314651,
"rewards/frontier_coverage_20": 0.08869966715574265,
"rewards/frontier_coverage_25": 0.07793186828494073,
"rewards/frontier_coverage_5": 0.1280773937702179,
"rewards/frontier_ece_reward": 0.006791994534432888,
"signal/accuracy_reward/centered_abs_mean": 0.089111328125,
"signal/accuracy_reward/group_std_mean": 0.11519538462162018,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0445556640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0445556640625,
"signal/advantage_abs_mean": 0.05169298425316811,
"signal/advantage_pre_scale_abs_mean": 0.05169298425316811,
"signal/advantage_pre_scale_std": 0.100075463950634,
"signal/advantage_std": 0.100075463950634,
"signal/brier_reward/centered_abs_mean": 0.10513882040977478,
"signal/brier_reward/group_std_mean": 0.13690476417541503,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013142352551221847,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013142352551221847,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02243536040186882,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029169962182641028,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028044200502336026,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028044200502336026,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0038931835908442734,
"signal/frontier_aurc_reward/group_std_mean": 0.006541755422949791,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.968798334128223e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.968798334128223e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13467796444892882,
"signal/frontier_coverage_1/group_std_mean": 0.17574736773967742,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024107354693114758,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024107354693114758,
"signal/frontier_coverage_10/centered_abs_mean": 0.13169972747564315,
"signal/frontier_coverage_10/group_std_mean": 0.17194417715072632,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002357425168156624,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002357425168156624,
"signal/frontier_coverage_15/centered_abs_mean": 0.11893990039825439,
"signal/frontier_coverage_15/group_std_mean": 0.15566185712814332,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021290241740643976,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021290241740643976,
"signal/frontier_coverage_20/centered_abs_mean": 0.09137549400329589,
"signal/frontier_coverage_20/group_std_mean": 0.11950999796390534,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016356213251128792,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016356213251128792,
"signal/frontier_coverage_25/centered_abs_mean": 0.07669355273246765,
"signal/frontier_coverage_25/group_std_mean": 0.09928402006626129,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013728145277127624,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013728145277127624,
"signal/frontier_coverage_5/centered_abs_mean": 0.13467796444892882,
"signal/frontier_coverage_5/group_std_mean": 0.17574736773967742,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024107354693114758,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024107354693114758,
"signal/frontier_ece_reward/centered_abs_mean": 0.006933884229511023,
"signal/frontier_ece_reward/group_std_mean": 0.008978551905602216,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008667355286888778,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008667355286888778,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.4541931873160891,
"eval_calibration/batch_distribution_entropy": 0.898303770535573,
"eval_calibration/buffer_distribution_entropy": 0.9306479081801373,
"eval_calibration/confidence_entropy": 0.4158571344448679,
"eval_calibration/coverage@0%": 0.0546875,
"eval_calibration/coverage@1%": 0.0546875,
"eval_calibration/coverage@10%": 0.0546875,
"eval_calibration/coverage@15%": 0.140625,
"eval_calibration/coverage@20%": 0.1796875,
"eval_calibration/coverage@25%": 0.28125,
"eval_calibration/coverage@30%": 0.34375,
"eval_calibration/coverage@5%": 0.0546875,
"eval_calibration/ece": 0.21719234240451588,
"eval_calibration/mean_confidence": 0.47853388727403584,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 371.0,
"eval_completions/max_terminated_length": 371.0,
"eval_completions/mean_length": 213.93096542358398,
"eval_completions/mean_terminated_length": 213.93096542358398,
"eval_completions/min_length": 119.75,
"eval_completions/min_terminated_length": 119.75,
"eval_loss": 0.0,
"eval_num_tokens": 842538614.0,
"eval_reward": 0.9405814409255981,
"eval_reward_std": 0.2327469326555729,
"eval_rewards/accuracy_reward": 0.41796875,
"eval_rewards/brier_reward": 0.7931023389101028,
"eval_rewards/confidence_uniqueness_reward": 0.89990234375,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.005058060982264578,
"eval_rewards/frontier_coverage_1": 0.21957293897867203,
"eval_rewards/frontier_coverage_10": 0.21819716319441795,
"eval_rewards/frontier_coverage_15": 0.20080500468611717,
"eval_rewards/frontier_coverage_20": 0.1389434989541769,
"eval_rewards/frontier_coverage_25": 0.07939251139760017,
"eval_rewards/frontier_coverage_5": 0.21957293897867203,
"eval_rewards/frontier_ece_reward": 0.006343576009385288,
"eval_runtime": 19.0686,
"eval_samples_per_second": 26.221,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4697265625,
"eval_signal/accuracy_reward/group_std_mean": 0.4920807480812073,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23486328125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23486328125,
"eval_signal/advantage_abs_mean": 0.2141471654176712,
"eval_signal/advantage_pre_scale_abs_mean": 0.2141471654176712,
"eval_signal/advantage_pre_scale_std": 0.23031814768910408,
"eval_signal/advantage_std": 0.23031814768910408,
"eval_signal/brier_reward/centered_abs_mean": 0.22076890245079994,
"eval_signal/brier_reward/group_std_mean": 0.27624931931495667,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027596112806349993,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027596112806349993,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0436859130859375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051504092290997505,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054607391357421875,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054607391357421875,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.006361409788951278,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.012974364450201392,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00011386923142708838,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00011386923142708838,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.37149525433778763,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4566838666796684,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006649764953181148,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006649764953181148,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.36934657394886017,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4541628435254097,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006611303542740643,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006611303542740643,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3406582325696945,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4199391156435013,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006097782286815345,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006097782286815345,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.23070517927408218,
"eval_signal/frontier_coverage_20/group_std_mean": 0.29086893051862717,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004129622713662684,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004129622713662684,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.13228562474250793,
"eval_signal/frontier_coverage_25/group_std_mean": 0.17096582800149918,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002367912617046386,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002367912617046386,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.37149525433778763,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4566838666796684,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006649764953181148,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006649764953181148,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.010944002773612738,
"eval_signal/frontier_ece_reward/group_std_mean": 0.014352588215842843,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013680003467015922,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013680003467015922,
"eval_steps_per_second": 0.21,
"step": 250
},
{
"calibration/aurc": 0.23105947403272936,
"calibration/batch_distribution_entropy": 0.8903880020915997,
"calibration/buffer_distribution_entropy": 0.9303524869671325,
"calibration/confidence_entropy": 0.3932704495727931,
"calibration/coverage@0%": 0.027734375,
"calibration/coverage@1%": 0.027734375,
"calibration/coverage@10%": 0.176171875,
"calibration/coverage@15%": 0.253515625,
"calibration/coverage@20%": 0.54609375,
"calibration/coverage@25%": 0.654296875,
"calibration/coverage@30%": 0.78125,
"calibration/coverage@5%": 0.074609375,
"calibration/ece": 0.14226193228141504,
"calibration/mean_confidence": 0.5599222824470196,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 446.8,
"completions/max_terminated_length": 446.8,
"completions/mean_length": 209.37236328125,
"completions/mean_terminated_length": 209.41301574707032,
"completions/min_length": 61.8,
"completions/min_terminated_length": 105.0,
"epoch": 0.816,
"grad_norm": 0.001042038551531732,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 859781755.0,
"reward": 1.0190700531005858,
"reward_std": 0.0696477398276329,
"rewards/accuracy_reward": 0.580078125,
"rewards/brier_reward": 0.7954442024230957,
"rewards/confidence_uniqueness_reward": 0.9529472947120666,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.004087340971454978,
"rewards/frontier_coverage_1": 0.10427137315273285,
"rewards/frontier_coverage_10": 0.10348986387252808,
"rewards/frontier_coverage_15": 0.1007502630352974,
"rewards/frontier_coverage_20": 0.08622306734323501,
"rewards/frontier_coverage_25": 0.06525748372077941,
"rewards/frontier_coverage_5": 0.10427137315273285,
"rewards/frontier_ece_reward": 0.004811486881226301,
"signal/accuracy_reward/centered_abs_mean": 0.08836669921875,
"signal/accuracy_reward/group_std_mean": 0.11848579794168472,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044183349609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044183349609375,
"signal/advantage_abs_mean": 0.05248023942112923,
"signal/advantage_pre_scale_abs_mean": 0.05248023942112923,
"signal/advantage_pre_scale_std": 0.0997240498661995,
"signal/advantage_std": 0.0997240498661995,
"signal/brier_reward/centered_abs_mean": 0.11835647821426391,
"signal/brier_reward/group_std_mean": 0.15209407210350037,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01479455977678299,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01479455977678299,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02083406373858452,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027383436262607575,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002604257967323065,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002604257967323065,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003906595194712282,
"signal/frontier_aurc_reward/group_std_mean": 0.006807331927120686,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.992805283516646e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.992805283516646e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14483171850442886,
"signal/frontier_coverage_1/group_std_mean": 0.18818309307098388,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002592487493529916,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002592487493529916,
"signal/frontier_coverage_10/centered_abs_mean": 0.14272255897521974,
"signal/frontier_coverage_10/group_std_mean": 0.1854997307062149,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002554733632132411,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002554733632132411,
"signal/frontier_coverage_15/centered_abs_mean": 0.134463931620121,
"signal/frontier_coverage_15/group_std_mean": 0.17486582398414613,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024069042410701513,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024069042410701513,
"signal/frontier_coverage_20/centered_abs_mean": 0.10605286359786988,
"signal/frontier_coverage_20/group_std_mean": 0.13851355910301208,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018983461894094944,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018983461894094944,
"signal/frontier_coverage_25/centered_abs_mean": 0.06077362969517708,
"signal/frontier_coverage_25/group_std_mean": 0.07842166125774383,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010878479108214379,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010878479108214379,
"signal/frontier_coverage_5/centered_abs_mean": 0.14483171850442886,
"signal/frontier_coverage_5/group_std_mean": 0.18818309307098388,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002592487493529916,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002592487493529916,
"signal/frontier_ece_reward/centered_abs_mean": 0.006092134676873684,
"signal/frontier_ece_reward/group_std_mean": 0.007921470142900944,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007615168346092105,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007615168346092105,
"step": 255
},
{
"calibration/aurc": 0.2737263926215349,
"calibration/batch_distribution_entropy": 0.9299605663645363,
"calibration/buffer_distribution_entropy": 0.9347806146762228,
"calibration/confidence_entropy": 0.42786286753485747,
"calibration/coverage@0%": 0.00859910102739726,
"calibration/coverage@1%": 0.00859910102739726,
"calibration/coverage@10%": 0.2626238380626223,
"calibration/coverage@15%": 0.3200655883072407,
"calibration/coverage@20%": 0.3603144875244618,
"calibration/coverage@25%": 0.4224414444716243,
"calibration/coverage@30%": 0.5310481898238748,
"calibration/coverage@5%": 0.1954057607632094,
"calibration/ece": 0.11588180823289287,
"calibration/mean_confidence": 0.48361126559001255,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 515.6,
"completions/max_terminated_length": 515.6,
"completions/mean_length": 215.35517578125,
"completions/mean_terminated_length": 215.35517578125,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.832,
"grad_norm": 0.0009220438660122454,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 876995344.0,
"reward": 1.0127004027366637,
"reward_std": 0.06446155533194542,
"rewards/accuracy_reward": 0.555859375,
"rewards/brier_reward": 0.8200256705284119,
"rewards/confidence_uniqueness_reward": 0.9512583017349243,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.003108630236238241,
"rewards/frontier_coverage_1": 0.14925569593906401,
"rewards/frontier_coverage_10": 0.14128147214651107,
"rewards/frontier_coverage_15": 0.12197073251008987,
"rewards/frontier_coverage_20": 0.08313180804252625,
"rewards/frontier_coverage_25": 0.07981288433074951,
"rewards/frontier_coverage_5": 0.14834679663181305,
"rewards/frontier_ece_reward": 0.0040693370625376705,
"signal/accuracy_reward/centered_abs_mean": 0.08602294921875,
"signal/accuracy_reward/group_std_mean": 0.11624790281057358,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043011474609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.043011474609375,
"signal/advantage_abs_mean": 0.048256377875804904,
"signal/advantage_pre_scale_abs_mean": 0.048256377875804904,
"signal/advantage_pre_scale_std": 0.0964440256357193,
"signal/advantage_std": 0.0964440256357193,
"signal/brier_reward/centered_abs_mean": 0.09866239726543427,
"signal/brier_reward/group_std_mean": 0.12830351293087006,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012332799658179284,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012332799658179284,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02103927619755268,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027020253613591193,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002629909524694085,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002629909524694085,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002520043752156198,
"signal/frontier_aurc_reward/group_std_mean": 0.004356763791292906,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5108782796887684e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5108782796887684e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13632754236459732,
"signal/frontier_coverage_1/group_std_mean": 0.17820183634757997,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024402629118412734,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024402629118412734,
"signal/frontier_coverage_10/centered_abs_mean": 0.12932903915643693,
"signal/frontier_coverage_10/group_std_mean": 0.16921607851982118,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023149897810071708,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023149897810071708,
"signal/frontier_coverage_15/centered_abs_mean": 0.11077270209789276,
"signal/frontier_coverage_15/group_std_mean": 0.1450467199087143,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00198283139616251,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00198283139616251,
"signal/frontier_coverage_20/centered_abs_mean": 0.06543788611888886,
"signal/frontier_coverage_20/group_std_mean": 0.08514793068170548,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011713381623849272,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011713381623849272,
"signal/frontier_coverage_25/centered_abs_mean": 0.055297840386629105,
"signal/frontier_coverage_25/group_std_mean": 0.07087110131978988,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009898313088342547,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009898313088342547,
"signal/frontier_coverage_5/centered_abs_mean": 0.1358020082116127,
"signal/frontier_coverage_5/group_std_mean": 0.1775294780731201,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024308559019118547,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024308559019118547,
"signal/frontier_ece_reward/centered_abs_mean": 0.005449410527944565,
"signal/frontier_ece_reward/group_std_mean": 0.00702635021880269,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006811763159930706,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006811763159930706,
"step": 260
},
{
"calibration/aurc": 0.29199818417946016,
"calibration/batch_distribution_entropy": 0.9449162561157868,
"calibration/buffer_distribution_entropy": 0.9576473851799351,
"calibration/confidence_entropy": 0.4540747092018599,
"calibration/coverage@0%": 0.030078125,
"calibration/coverage@1%": 0.046484375,
"calibration/coverage@10%": 0.21015625,
"calibration/coverage@15%": 0.30859375,
"calibration/coverage@20%": 0.431640625,
"calibration/coverage@25%": 0.488671875,
"calibration/coverage@30%": 0.576171875,
"calibration/coverage@5%": 0.158203125,
"calibration/ece": 0.14679800876078744,
"calibration/mean_confidence": 0.5572934415892845,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 503.2,
"completions/max_terminated_length": 503.2,
"completions/mean_length": 218.5626953125,
"completions/mean_terminated_length": 218.5626953125,
"completions/min_length": 107.6,
"completions/min_terminated_length": 107.6,
"epoch": 0.848,
"grad_norm": 0.0009493394172750413,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 894247794.0,
"reward": 0.9997729182243347,
"reward_std": 0.06255611553788185,
"rewards/accuracy_reward": 0.5322265625,
"rewards/brier_reward": 0.809646463394165,
"rewards/confidence_uniqueness_reward": 0.95616455078125,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.004203257244080305,
"rewards/frontier_coverage_1": 0.14884999692440032,
"rewards/frontier_coverage_10": 0.13910053074359893,
"rewards/frontier_coverage_15": 0.11337107419967651,
"rewards/frontier_coverage_20": 0.08508162200450897,
"rewards/frontier_coverage_25": 0.06492899954319001,
"rewards/frontier_coverage_5": 0.147222076356411,
"rewards/frontier_ece_reward": 0.004035304859280586,
"signal/accuracy_reward/centered_abs_mean": 0.07576904296875,
"signal/accuracy_reward/group_std_mean": 0.10509671717882156,
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037884521484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037884521484375,
"signal/advantage_abs_mean": 0.046570013463497165,
"signal/advantage_pre_scale_abs_mean": 0.046570013463497165,
"signal/advantage_pre_scale_std": 0.0919475108385086,
"signal/advantage_std": 0.0919475108385086,
"signal/brier_reward/centered_abs_mean": 0.10335270017385483,
"signal/brier_reward/group_std_mean": 0.1338736593723297,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012919087521731853,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012919087521731853,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019035768508911134,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02395063266158104,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023794710636138917,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023794710636138917,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003562123980373144,
"signal/frontier_aurc_reward/group_std_mean": 0.006216035131365061,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.376201563398353e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.376201563398353e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1347525641322136,
"signal/frontier_coverage_1/group_std_mean": 0.1769658923149109,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024120708461850882,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024120708461850882,
"signal/frontier_coverage_10/centered_abs_mean": 0.12726502120494843,
"signal/frontier_coverage_10/group_std_mean": 0.16702594459056855,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022780438885092735,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022780438885092735,
"signal/frontier_coverage_15/centered_abs_mean": 0.10727915167808533,
"signal/frontier_coverage_15/group_std_mean": 0.14058453142642974,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001920296740718186,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001920296740718186,
"signal/frontier_coverage_20/centered_abs_mean": 0.07309994548559189,
"signal/frontier_coverage_20/group_std_mean": 0.09628810286521912,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013084889855235816,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013084889855235816,
"signal/frontier_coverage_25/centered_abs_mean": 0.052524034678936,
"signal/frontier_coverage_25/group_std_mean": 0.06742269843816757,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009401801857165992,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009401801857165992,
"signal/frontier_coverage_5/centered_abs_mean": 0.1335119917988777,
"signal/frontier_coverage_5/group_std_mean": 0.1753005027770996,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002389864530414343,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002389864530414343,
"signal/frontier_ece_reward/centered_abs_mean": 0.006145635060966015,
"signal/frontier_ece_reward/group_std_mean": 0.007988576404750347,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007682043826207519,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007682043826207519,
"step": 265
},
{
"calibration/aurc": 0.2910564013391039,
"calibration/batch_distribution_entropy": 0.9253104409605208,
"calibration/buffer_distribution_entropy": 0.9685836600756307,
"calibration/confidence_entropy": 0.46880515821232116,
"calibration/coverage@0%": 0.006640625,
"calibration/coverage@1%": 0.006640625,
"calibration/coverage@10%": 0.13984375,
"calibration/coverage@15%": 0.2015625,
"calibration/coverage@20%": 0.348828125,
"calibration/coverage@25%": 0.41640625,
"calibration/coverage@30%": 0.465625,
"calibration/coverage@5%": 0.072265625,
"calibration/ece": 0.14704862560796675,
"calibration/mean_confidence": 0.6076630519058775,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 624.0,
"completions/max_terminated_length": 624.0,
"completions/mean_length": 227.85029296875,
"completions/mean_terminated_length": 228.00685119628906,
"completions/min_length": 67.6,
"completions/min_terminated_length": 114.4,
"epoch": 0.864,
"grad_norm": 0.0008776676841080189,
"learning_rate": 1e-06,
"loss": -0.0006,
"num_tokens": 911567797.0,
"reward": 1.0216901302337646,
"reward_std": 0.06968635767698288,
"rewards/accuracy_reward": 0.58603515625,
"rewards/brier_reward": 0.8016023874282837,
"rewards/confidence_uniqueness_reward": 0.954932713508606,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.004055592557415366,
"rewards/frontier_coverage_1": 0.10094125419855118,
"rewards/frontier_coverage_10": 0.09590005427598954,
"rewards/frontier_coverage_15": 0.08516524583101273,
"rewards/frontier_coverage_20": 0.06728310883045197,
"rewards/frontier_coverage_25": 0.06532372683286666,
"rewards/frontier_coverage_5": 0.1006399393081665,
"rewards/frontier_ece_reward": 0.0023761557880789042,
"signal/accuracy_reward/centered_abs_mean": 0.093060302734375,
"signal/accuracy_reward/group_std_mean": 0.12451760470867157,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0465301513671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0465301513671875,
"signal/advantage_abs_mean": 0.05201050192117691,
"signal/advantage_pre_scale_abs_mean": 0.05201050192117691,
"signal/advantage_pre_scale_std": 0.09984122812747956,
"signal/advantage_std": 0.09984122812747956,
"signal/brier_reward/centered_abs_mean": 0.11012591570615768,
"signal/brier_reward/group_std_mean": 0.14079957008361815,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01376573946326971,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01376573946326971,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020087697356939316,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02534872628748417,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025109621696174146,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025109621696174146,
"signal/format_reward/centered_abs_mean": 0.001141357421875,
"signal/format_reward/group_std_mean": 0.0017916702199727297,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005706787109375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005706787109375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036198759451508524,
"signal/frontier_aurc_reward/group_std_mean": 0.006112173385918141,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.479577859863639e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.479577859863639e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14493168145418167,
"signal/frontier_coverage_1/group_std_mean": 0.18660034835338593,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025942770298570395,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025942770298570395,
"signal/frontier_coverage_10/centered_abs_mean": 0.1331159457564354,
"signal/frontier_coverage_10/group_std_mean": 0.17127929031848907,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023827753495424984,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023827753495424984,
"signal/frontier_coverage_15/centered_abs_mean": 0.1076448142528534,
"signal/frontier_coverage_15/group_std_mean": 0.1388661742210388,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019268421223387123,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019268421223387123,
"signal/frontier_coverage_20/centered_abs_mean": 0.07579494565725327,
"signal/frontier_coverage_20/group_std_mean": 0.09808354526758194,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013567295158281923,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013567295158281923,
"signal/frontier_coverage_25/centered_abs_mean": 0.058327066153287886,
"signal/frontier_coverage_25/group_std_mean": 0.0748221144080162,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010440544574521482,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010440544574521482,
"signal/frontier_coverage_5/centered_abs_mean": 0.14447592794895173,
"signal/frontier_coverage_5/group_std_mean": 0.18601977527141572,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00258611892350018,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00258611892350018,
"signal/frontier_ece_reward/centered_abs_mean": 0.005081780906766653,
"signal/frontier_ece_reward/group_std_mean": 0.006655264738947153,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006352226133458317,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006352226133458317,
"step": 270
},
{
"calibration/aurc": 0.345973101674254,
"calibration/batch_distribution_entropy": 0.9493667014431317,
"calibration/buffer_distribution_entropy": 0.9578349785610758,
"calibration/confidence_entropy": 0.43273207823055165,
"calibration/coverage@0%": 0.01916202910958904,
"calibration/coverage@1%": 0.01916202910958904,
"calibration/coverage@10%": 0.06800850048923679,
"calibration/coverage@15%": 0.10945144324853229,
"calibration/coverage@20%": 0.21189151174168294,
"calibration/coverage@25%": 0.29436689701565555,
"calibration/coverage@30%": 0.37721379647749514,
"calibration/coverage@5%": 0.01916202910958904,
"calibration/ece": 0.14380449822889765,
"calibration/mean_confidence": 0.5184599565507902,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 578.4,
"completions/max_terminated_length": 578.4,
"completions/mean_length": 232.13271484375,
"completions/mean_terminated_length": 232.17763061523436,
"completions/min_length": 63.2,
"completions/min_terminated_length": 106.0,
"epoch": 0.88,
"grad_norm": 0.0013488256372511387,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 929091908.0,
"reward": 0.9903743028640747,
"reward_std": 0.06995146423578262,
"rewards/accuracy_reward": 0.5111328125,
"rewards/brier_reward": 0.8095457434654236,
"rewards/confidence_uniqueness_reward": 0.9556445837020874,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.00441735852509737,
"rewards/frontier_coverage_1": 0.16093263030052185,
"rewards/frontier_coverage_10": 0.1454271823167801,
"rewards/frontier_coverage_15": 0.12577222883701325,
"rewards/frontier_coverage_20": 0.10250143557786942,
"rewards/frontier_coverage_25": 0.08574089109897613,
"rewards/frontier_coverage_5": 0.16091531813144683,
"rewards/frontier_ece_reward": 0.0028060302603989838,
"signal/accuracy_reward/centered_abs_mean": 0.0927001953125,
"signal/accuracy_reward/group_std_mean": 0.11805247217416763,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04635009765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04635009765625,
"signal/advantage_abs_mean": 0.05447976440191269,
"signal/advantage_pre_scale_abs_mean": 0.05447976440191269,
"signal/advantage_pre_scale_std": 0.10289558172225952,
"signal/advantage_std": 0.10289558172225952,
"signal/brier_reward/centered_abs_mean": 0.10742014944553376,
"signal/brier_reward/group_std_mean": 0.1387660324573517,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01342751868069172,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01342751868069172,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01949140876531601,
"signal/confidence_uniqueness_reward/group_std_mean": 0.025074663758277892,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024364260956645013,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024364260956645013,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003677291143685579,
"signal/frontier_aurc_reward/group_std_mean": 0.005867359507828951,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.582351052202285e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.582351052202285e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14726624488830567,
"signal/frontier_coverage_1/group_std_mean": 0.18830116987228393,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002636065660044551,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002636065660044551,
"signal/frontier_coverage_10/centered_abs_mean": 0.13290869295597077,
"signal/frontier_coverage_10/group_std_mean": 0.17024919986724854,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002379065519198775,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002379065519198775,
"signal/frontier_coverage_15/centered_abs_mean": 0.10564069151878357,
"signal/frontier_coverage_15/group_std_mean": 0.13563039898872375,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018909682519733905,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018909682519733905,
"signal/frontier_coverage_20/centered_abs_mean": 0.08119350373744964,
"signal/frontier_coverage_20/group_std_mean": 0.10403980016708374,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014533637091517448,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014533637091517448,
"signal/frontier_coverage_25/centered_abs_mean": 0.06113082841038704,
"signal/frontier_coverage_25/group_std_mean": 0.0785527378320694,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001094241812825203,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001094241812825203,
"signal/frontier_coverage_5/centered_abs_mean": 0.14714258909225464,
"signal/frontier_coverage_5/group_std_mean": 0.18813947439193726,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026338521391153337,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026338521391153337,
"signal/frontier_ece_reward/centered_abs_mean": 0.004730461305007339,
"signal/frontier_ece_reward/group_std_mean": 0.006142756808549166,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005913076631259174,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005913076631259174,
"step": 275
},
{
"calibration/aurc": 0.35160675122175394,
"calibration/batch_distribution_entropy": 0.937352612044848,
"calibration/buffer_distribution_entropy": 0.957746533618099,
"calibration/confidence_entropy": 0.435856469595511,
"calibration/coverage@0%": 0.03711396159491194,
"calibration/coverage@1%": 0.03711396159491194,
"calibration/coverage@10%": 0.07853855797455969,
"calibration/coverage@15%": 0.12385946673189822,
"calibration/coverage@20%": 0.18948905332681018,
"calibration/coverage@25%": 0.2574578033268102,
"calibration/coverage@30%": 0.4949662120841487,
"calibration/coverage@5%": 0.04180146159491194,
"calibration/ece": 0.16426921138511993,
"calibration/mean_confidence": 0.5625332614498266,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 805.4,
"completions/max_terminated_length": 805.4,
"completions/mean_length": 237.45625,
"completions/mean_terminated_length": 237.52623291015624,
"completions/min_length": 47.4,
"completions/min_terminated_length": 117.6,
"epoch": 0.896,
"grad_norm": 0.001406333758495748,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 946634308.0,
"reward": 1.0051775932312013,
"reward_std": 0.06571876630187035,
"rewards/accuracy_reward": 0.544140625,
"rewards/brier_reward": 0.8050177812576294,
"rewards/confidence_uniqueness_reward": 0.957094931602478,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.004963052179664374,
"rewards/frontier_coverage_1": 0.1328397125005722,
"rewards/frontier_coverage_10": 0.13134128004312515,
"rewards/frontier_coverage_15": 0.11960946768522263,
"rewards/frontier_coverage_20": 0.09785552620887757,
"rewards/frontier_coverage_25": 0.10049206763505936,
"rewards/frontier_coverage_5": 0.13219754993915558,
"rewards/frontier_ece_reward": 0.002726085647009313,
"signal/accuracy_reward/centered_abs_mean": 0.08485107421875,
"signal/accuracy_reward/group_std_mean": 0.11643068790435791,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042425537109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042425537109375,
"signal/advantage_abs_mean": 0.04787022992968559,
"signal/advantage_pre_scale_abs_mean": 0.04787022992968559,
"signal/advantage_pre_scale_std": 0.0955558255314827,
"signal/advantage_std": 0.0955558255314827,
"signal/brier_reward/centered_abs_mean": 0.10153112411499024,
"signal/brier_reward/group_std_mean": 0.1315218985080719,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01269139051437378,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01269139051437378,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018539119511842728,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024511999264359475,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002317389938980341,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002317389938980341,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035955175990238786,
"signal/frontier_aurc_reward/group_std_mean": 0.0059560602996498345,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.43597639282234e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.43597639282234e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13631070256233216,
"signal/frontier_coverage_1/group_std_mean": 0.1748459130525589,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00243996144272387,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00243996144272387,
"signal/frontier_coverage_10/centered_abs_mean": 0.1266059935092926,
"signal/frontier_coverage_10/group_std_mean": 0.1624742865562439,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022662471048533915,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022662471048533915,
"signal/frontier_coverage_15/centered_abs_mean": 0.10961523801088333,
"signal/frontier_coverage_15/group_std_mean": 0.1403743863105774,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019621126586571334,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019621126586571334,
"signal/frontier_coverage_20/centered_abs_mean": 0.0809634193778038,
"signal/frontier_coverage_20/group_std_mean": 0.10352005809545517,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014492451678961515,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014492451678961515,
"signal/frontier_coverage_25/centered_abs_mean": 0.07251047790050506,
"signal/frontier_coverage_25/group_std_mean": 0.09240870028734208,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012979375198483468,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012979375198483468,
"signal/frontier_coverage_5/centered_abs_mean": 0.13586267530918122,
"signal/frontier_coverage_5/group_std_mean": 0.17419685125350953,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024319417774677277,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024319417774677277,
"signal/frontier_ece_reward/centered_abs_mean": 0.005482800677418709,
"signal/frontier_ece_reward/group_std_mean": 0.007079447526484728,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006853500846773386,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006853500846773386,
"step": 280
},
{
"calibration/aurc": 0.35158408632793803,
"calibration/batch_distribution_entropy": 0.9562167683424792,
"calibration/buffer_distribution_entropy": 0.9561731055344689,
"calibration/confidence_entropy": 0.44419157410479937,
"calibration/coverage@0%": 0.015234375,
"calibration/coverage@1%": 0.015234375,
"calibration/coverage@10%": 0.107421875,
"calibration/coverage@15%": 0.233203125,
"calibration/coverage@20%": 0.32421875,
"calibration/coverage@25%": 0.420703125,
"calibration/coverage@30%": 0.490234375,
"calibration/coverage@5%": 0.030078125,
"calibration/ece": 0.15778585097962264,
"calibration/mean_confidence": 0.5176856689053531,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 639.8,
"completions/max_terminated_length": 639.8,
"completions/mean_length": 239.90751953125,
"completions/mean_terminated_length": 239.97922973632814,
"completions/min_length": 95.2,
"completions/min_terminated_length": 119.4,
"epoch": 0.912,
"grad_norm": 0.0006928271031938493,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 964142257.0,
"reward": 1.005584442615509,
"reward_std": 0.06624010503292084,
"rewards/accuracy_reward": 0.54365234375,
"rewards/brier_reward": 0.8104263663291931,
"rewards/confidence_uniqueness_reward": 0.9589905261993408,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0038628154201433063,
"rewards/frontier_coverage_1": 0.13283431753516198,
"rewards/frontier_coverage_10": 0.13065023571252823,
"rewards/frontier_coverage_15": 0.11201114878058434,
"rewards/frontier_coverage_20": 0.08985281139612197,
"rewards/frontier_coverage_25": 0.0930003970861435,
"rewards/frontier_coverage_5": 0.13264696821570396,
"rewards/frontier_ece_reward": 0.003423719014972448,
"signal/accuracy_reward/centered_abs_mean": 0.080096435546875,
"signal/accuracy_reward/group_std_mean": 0.11140190660953522,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400482177734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0400482177734375,
"signal/advantage_abs_mean": 0.04888551086187363,
"signal/advantage_pre_scale_abs_mean": 0.04888551086187363,
"signal/advantage_pre_scale_std": 0.09277830272912979,
"signal/advantage_std": 0.09277830272912979,
"signal/brier_reward/centered_abs_mean": 0.11297860145568847,
"signal/brier_reward/group_std_mean": 0.14475666582584382,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014122325181961059,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014122325181961059,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017448830977082254,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022930829599499703,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021811038721352817,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021811038721352817,
"signal/format_reward/centered_abs_mean": 0.000555419921875,
"signal/format_reward/group_std_mean": 0.0013209730386734009,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030974366702139376,
"signal/frontier_aurc_reward/group_std_mean": 0.005012795515358448,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.54441154235974e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.54441154235974e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1505083903670311,
"signal/frontier_coverage_1/group_std_mean": 0.19186728000640868,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002694100048393011,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002694100048393011,
"signal/frontier_coverage_10/centered_abs_mean": 0.14712896049022675,
"signal/frontier_coverage_10/group_std_mean": 0.18757612407207488,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002633608318865299,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002633608318865299,
"signal/frontier_coverage_15/centered_abs_mean": 0.1204127699136734,
"signal/frontier_coverage_15/group_std_mean": 0.15421084463596343,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002155388565734029,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002155388565734029,
"signal/frontier_coverage_20/centered_abs_mean": 0.08603039383888245,
"signal/frontier_coverage_20/group_std_mean": 0.11032881885766983,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015399440191686154,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015399440191686154,
"signal/frontier_coverage_25/centered_abs_mean": 0.07305942624807357,
"signal/frontier_coverage_25/group_std_mean": 0.09441265314817429,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013077637180685998,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013077637180685998,
"signal/frontier_coverage_5/centered_abs_mean": 0.14992099851369858,
"signal/frontier_coverage_5/group_std_mean": 0.19108366966247559,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002683585789054632,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002683585789054632,
"signal/frontier_ece_reward/centered_abs_mean": 0.005137715861201287,
"signal/frontier_ece_reward/group_std_mean": 0.006601763609796763,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006422144826501608,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006422144826501608,
"step": 285
},
{
"calibration/aurc": 0.38500850061467895,
"calibration/batch_distribution_entropy": 0.9541660627238278,
"calibration/buffer_distribution_entropy": 0.9676595332120735,
"calibration/confidence_entropy": 0.44988325108230454,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.01015625,
"calibration/coverage@15%": 0.019921875,
"calibration/coverage@20%": 0.0328125,
"calibration/coverage@25%": 0.14314609833659492,
"calibration/coverage@30%": 0.3260182240704501,
"calibration/coverage@5%": 0.00546875,
"calibration/ece": 0.1563020786443196,
"calibration/mean_confidence": 0.54675974784237,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 632.0,
"completions/max_terminated_length": 632.0,
"completions/mean_length": 233.184765625,
"completions/mean_terminated_length": 233.20745849609375,
"completions/min_length": 94.4,
"completions/min_terminated_length": 112.8,
"epoch": 0.928,
"grad_norm": 0.0008441190584562719,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 981556885.0,
"reward": 0.9945326924324036,
"reward_std": 0.069176334887743,
"rewards/accuracy_reward": 0.5279296875,
"rewards/brier_reward": 0.7925830125808716,
"rewards/confidence_uniqueness_reward": 0.9535414099693298,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.004847506247460842,
"rewards/frontier_coverage_1": 0.13357798159122466,
"rewards/frontier_coverage_10": 0.12652733474969863,
"rewards/frontier_coverage_15": 0.1060033068060875,
"rewards/frontier_coverage_20": 0.08794465065002441,
"rewards/frontier_coverage_25": 0.0847175195813179,
"rewards/frontier_coverage_5": 0.1330983817577362,
"rewards/frontier_ece_reward": 0.003291472140699625,
"signal/accuracy_reward/centered_abs_mean": 0.0863525390625,
"signal/accuracy_reward/group_std_mean": 0.11669287532567978,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04317626953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04317626953125,
"signal/advantage_abs_mean": 0.05193809494376182,
"signal/advantage_pre_scale_abs_mean": 0.05193809494376182,
"signal/advantage_pre_scale_std": 0.10021101087331771,
"signal/advantage_std": 0.10021101087331771,
"signal/brier_reward/centered_abs_mean": 0.1140342727303505,
"signal/brier_reward/group_std_mean": 0.14738682210445403,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014254284091293812,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014254284091293812,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020669334381818772,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02657599709928036,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025836667977273465,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025836667977273465,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0041755520273,
"signal/frontier_aurc_reward/group_std_mean": 0.006980878114700317,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.474237863789313e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.474237863789313e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13945655822753905,
"signal/frontier_coverage_1/group_std_mean": 0.18133697807788848,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024962722789496185,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024962722789496185,
"signal/frontier_coverage_10/centered_abs_mean": 0.13074179440736772,
"signal/frontier_coverage_10/group_std_mean": 0.17016540467739105,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023402780294418334,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023402780294418334,
"signal/frontier_coverage_15/centered_abs_mean": 0.10502620935440063,
"signal/frontier_coverage_15/group_std_mean": 0.13652084916830062,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018799690529704093,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018799690529704093,
"signal/frontier_coverage_20/centered_abs_mean": 0.08000846803188325,
"signal/frontier_coverage_20/group_std_mean": 0.10377214401960373,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014321515103802086,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014321515103802086,
"signal/frontier_coverage_25/centered_abs_mean": 0.06775118708610535,
"signal/frontier_coverage_25/group_std_mean": 0.0874949112534523,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001212746207602322,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001212746207602322,
"signal/frontier_coverage_5/centered_abs_mean": 0.1389759063720703,
"signal/frontier_coverage_5/group_std_mean": 0.1806858241558075,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002487668581306934,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002487668581306934,
"signal/frontier_ece_reward/centered_abs_mean": 0.005040143057703972,
"signal/frontier_ece_reward/group_std_mean": 0.006339070200920105,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006300178822129965,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006300178822129965,
"step": 290
},
{
"calibration/aurc": 0.24533204975535733,
"calibration/batch_distribution_entropy": 0.9452989449951105,
"calibration/buffer_distribution_entropy": 0.9759185141371107,
"calibration/confidence_entropy": 0.43589906919508004,
"calibration/coverage@0%": 0.05546875,
"calibration/coverage@1%": 0.05546875,
"calibration/coverage@10%": 0.208984375,
"calibration/coverage@15%": 0.309375,
"calibration/coverage@20%": 0.41328125,
"calibration/coverage@25%": 0.536328125,
"calibration/coverage@30%": 0.61484375,
"calibration/coverage@5%": 0.13515625,
"calibration/ece": 0.09939514506864612,
"calibration/mean_confidence": 0.5365272507343248,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 605.8,
"completions/max_terminated_length": 605.8,
"completions/mean_length": 229.26923828125,
"completions/mean_terminated_length": 229.3147430419922,
"completions/min_length": 68.2,
"completions/min_terminated_length": 111.8,
"epoch": 0.944,
"grad_norm": 0.0008980790735222399,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 998880026.0,
"reward": 1.00025976896286,
"reward_std": 0.07237638309597969,
"rewards/accuracy_reward": 0.534375,
"rewards/brier_reward": 0.8018420696258545,
"rewards/confidence_uniqueness_reward": 0.9535500884056092,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0040782707743346695,
"rewards/frontier_coverage_1": 0.14479023218154907,
"rewards/frontier_coverage_10": 0.13982656300067903,
"rewards/frontier_coverage_15": 0.1250221073627472,
"rewards/frontier_coverage_20": 0.10365102738142014,
"rewards/frontier_coverage_25": 0.07933511734008789,
"rewards/frontier_coverage_5": 0.14416193962097168,
"rewards/frontier_ece_reward": 0.005043480265885592,
"signal/accuracy_reward/centered_abs_mean": 0.10399169921875,
"signal/accuracy_reward/group_std_mean": 0.13489952236413955,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051995849609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051995849609375,
"signal/advantage_abs_mean": 0.05535328909754753,
"signal/advantage_pre_scale_abs_mean": 0.05535328909754753,
"signal/advantage_pre_scale_std": 0.10389592349529267,
"signal/advantage_std": 0.10389592349529267,
"signal/brier_reward/centered_abs_mean": 0.1098005548119545,
"signal/brier_reward/group_std_mean": 0.14013067930936812,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013725069351494312,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013725069351494312,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020933642983436584,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027122461423277854,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002616705372929573,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002616705372929573,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031758068595081567,
"signal/frontier_aurc_reward/group_std_mean": 0.005115151405334473,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.684694115188904e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.684694115188904e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15270479023456573,
"signal/frontier_coverage_1/group_std_mean": 0.19418494403362274,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027334156446158888,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027334156446158888,
"signal/frontier_coverage_10/centered_abs_mean": 0.1449616074562073,
"signal/frontier_coverage_10/group_std_mean": 0.18443833589553832,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002594812633469701,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002594812633469701,
"signal/frontier_coverage_15/centered_abs_mean": 0.1284430891275406,
"signal/frontier_coverage_15/group_std_mean": 0.16366292238235475,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022991311736404894,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022991311736404894,
"signal/frontier_coverage_20/centered_abs_mean": 0.10182230472564698,
"signal/frontier_coverage_20/group_std_mean": 0.12996439933776854,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018226192332804203,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018226192332804203,
"signal/frontier_coverage_25/centered_abs_mean": 0.07012421786785125,
"signal/frontier_coverage_25/group_std_mean": 0.08924262970685959,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012552234344184398,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012552234344184398,
"signal/frontier_coverage_5/centered_abs_mean": 0.1518775552511215,
"signal/frontier_coverage_5/group_std_mean": 0.1931472510099411,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002718608174473047,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002718608174473047,
"signal/frontier_ece_reward/centered_abs_mean": 0.006178434193134308,
"signal/frontier_ece_reward/group_std_mean": 0.007841460406780243,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007723042741417885,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007723042741417885,
"step": 295
},
{
"calibration/aurc": 0.3187057921941282,
"calibration/batch_distribution_entropy": 0.9263256515577549,
"calibration/buffer_distribution_entropy": 0.9591052414404018,
"calibration/confidence_entropy": 0.4034783089101139,
"calibration/coverage@0%": 0.00234375,
"calibration/coverage@1%": 0.00234375,
"calibration/coverage@10%": 0.028125,
"calibration/coverage@15%": 0.246484375,
"calibration/coverage@20%": 0.298828125,
"calibration/coverage@25%": 0.38203125,
"calibration/coverage@30%": 0.52890625,
"calibration/coverage@5%": 0.0125,
"calibration/ece": 0.17384393923406577,
"calibration/mean_confidence": 0.5563768093603549,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 564.2,
"completions/max_terminated_length": 564.2,
"completions/mean_length": 224.7591796875,
"completions/mean_terminated_length": 224.9130859375,
"completions/min_length": 42.8,
"completions/min_terminated_length": 112.4,
"epoch": 0.96,
"grad_norm": 0.0007371063693426549,
"learning_rate": 1e-06,
"loss": -0.0006,
"num_tokens": 1016121880.0,
"reward": 0.9967244625091553,
"reward_std": 0.06260569468140602,
"rewards/accuracy_reward": 0.5236328125,
"rewards/brier_reward": 0.8056876301765442,
"rewards/confidence_uniqueness_reward": 0.9528008699417114,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.004861214105039835,
"rewards/frontier_coverage_1": 0.1592409610748291,
"rewards/frontier_coverage_10": 0.15207428336143494,
"rewards/frontier_coverage_15": 0.1416480913758278,
"rewards/frontier_coverage_20": 0.122801274061203,
"rewards/frontier_coverage_25": 0.09591661542654037,
"rewards/frontier_coverage_5": 0.15799247920513154,
"rewards/frontier_ece_reward": 0.005397499911487103,
"signal/accuracy_reward/centered_abs_mean": 0.08304443359375,
"signal/accuracy_reward/group_std_mean": 0.11143480986356735,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041522216796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.041522216796875,
"signal/advantage_abs_mean": 0.04635867327451706,
"signal/advantage_pre_scale_abs_mean": 0.04635867327451706,
"signal/advantage_pre_scale_std": 0.09322313666343689,
"signal/advantage_std": 0.09322313666343689,
"signal/brier_reward/centered_abs_mean": 0.1010708749294281,
"signal/brier_reward/group_std_mean": 0.1333750456571579,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012633859366178512,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012633859366178512,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02059360146522522,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027993280440568924,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025742001831531524,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025742001831531524,
"signal/format_reward/centered_abs_mean": 0.001312255859375,
"signal/format_reward/group_std_mean": 0.0035306816454976795,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0038483137730509044,
"signal/frontier_aurc_reward/group_std_mean": 0.00625881552696228,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.888481584610417e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.888481584610417e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1346493184566498,
"signal/frontier_coverage_1/group_std_mean": 0.17746165692806243,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002410222636535764,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002410222636535764,
"signal/frontier_coverage_10/centered_abs_mean": 0.12848362624645232,
"signal/frontier_coverage_10/group_std_mean": 0.1693892151117325,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00229985686019063,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00229985686019063,
"signal/frontier_coverage_15/centered_abs_mean": 0.11797473281621933,
"signal/frontier_coverage_15/group_std_mean": 0.15566626489162444,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002111747674643993,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002111747674643993,
"signal/frontier_coverage_20/centered_abs_mean": 0.09770219922065734,
"signal/frontier_coverage_20/group_std_mean": 0.1287187471985817,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001748869358561933,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001748869358561933,
"signal/frontier_coverage_25/centered_abs_mean": 0.07094139754772186,
"signal/frontier_coverage_25/group_std_mean": 0.09213098138570786,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012698509730398656,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012698509730398656,
"signal/frontier_coverage_5/centered_abs_mean": 0.13373664319515227,
"signal/frontier_coverage_5/group_std_mean": 0.1762985348701477,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023938857018947603,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023938857018947603,
"signal/frontier_ece_reward/centered_abs_mean": 0.006078108306974172,
"signal/frontier_ece_reward/group_std_mean": 0.007710330653935671,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007597635383717715,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007597635383717715,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.4099814499895668,
"eval_calibration/batch_distribution_entropy": 0.9038407620245683,
"eval_calibration/buffer_distribution_entropy": 0.945839954331275,
"eval_calibration/confidence_entropy": 0.42479212258231097,
"eval_calibration/coverage@0%": 0.0546875,
"eval_calibration/coverage@1%": 0.0546875,
"eval_calibration/coverage@10%": 0.0546875,
"eval_calibration/coverage@15%": 0.0546875,
"eval_calibration/coverage@20%": 0.1953125,
"eval_calibration/coverage@25%": 0.28125,
"eval_calibration/coverage@30%": 0.3046875,
"eval_calibration/coverage@5%": 0.0546875,
"eval_calibration/ece": 0.1974764637360075,
"eval_calibration/mean_confidence": 0.4965389637360075,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 438.0,
"eval_completions/max_terminated_length": 438.0,
"eval_completions/mean_length": 226.42355728149414,
"eval_completions/mean_terminated_length": 226.42355728149414,
"eval_completions/min_length": 125.5,
"eval_completions/min_terminated_length": 125.5,
"eval_loss": 0.0,
"eval_num_tokens": 1016121880.0,
"eval_reward": 0.956175222992897,
"eval_reward_std": 0.23227940499782562,
"eval_rewards/accuracy_reward": 0.44921875,
"eval_rewards/brier_reward": 0.8000754117965698,
"eval_rewards/confidence_uniqueness_reward": 0.9013671875,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.004403327591717243,
"eval_rewards/frontier_coverage_1": 0.2066951021552086,
"eval_rewards/frontier_coverage_10": 0.19859884679317474,
"eval_rewards/frontier_coverage_15": 0.18650996312499046,
"eval_rewards/frontier_coverage_20": 0.13811534643173218,
"eval_rewards/frontier_coverage_25": 0.09158709645271301,
"eval_rewards/frontier_coverage_5": 0.20559290051460266,
"eval_rewards/frontier_ece_reward": 0.004634097334928811,
"eval_runtime": 21.3965,
"eval_samples_per_second": 23.368,
"eval_signal/accuracy_reward/centered_abs_mean": 0.485595703125,
"eval_signal/accuracy_reward/group_std_mean": 0.5005338564515114,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2427978515625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2427978515625,
"eval_signal/advantage_abs_mean": 0.21783225610852242,
"eval_signal/advantage_pre_scale_abs_mean": 0.21783225610852242,
"eval_signal/advantage_pre_scale_std": 0.22964715585112572,
"eval_signal/advantage_std": 0.22964715585112572,
"eval_signal/brier_reward/centered_abs_mean": 0.21054896339774132,
"eval_signal/brier_reward/group_std_mean": 0.2618413344025612,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026318620424717665,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.026318620424717665,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0418243408203125,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051946827210485935,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0052280426025390625,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0052280426025390625,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005521226790733635,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.010318566113710403,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.88299580058083e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.88299580058083e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3880726844072342,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4717392474412918,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006946500740014017,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006946500740014017,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3699818626046181,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4505543038249016,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006622675224207342,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006622675224207342,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.34499986469745636,
"eval_signal/frontier_coverage_15/group_std_mean": 0.421297088265419,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0061754974303767085,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0061754974303767085,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.23970409855246544,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2946352958679199,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0042907033348456025,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0042907033348456025,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1518411710858345,
"eval_signal/frontier_coverage_25/group_std_mean": 0.19357599690556526,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027179569005966187,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027179569005966187,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3851188123226166,
"eval_signal/frontier_coverage_5/group_std_mean": 0.46821124851703644,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006893626414239407,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006893626414239407,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.008766131941229105,
"eval_signal/frontier_ece_reward/group_std_mean": 0.011146760312840343,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010957664926536381,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010957664926536381,
"eval_steps_per_second": 0.187,
"step": 300
},
{
"calibration/aurc": 0.24400723018511927,
"calibration/batch_distribution_entropy": 0.9473744851892254,
"calibration/buffer_distribution_entropy": 0.949229625921531,
"calibration/confidence_entropy": 0.45559131492766747,
"calibration/coverage@0%": 0.024609375,
"calibration/coverage@1%": 0.024609375,
"calibration/coverage@10%": 0.30390625,
"calibration/coverage@15%": 0.409375,
"calibration/coverage@20%": 0.489453125,
"calibration/coverage@25%": 0.55625,
"calibration/coverage@30%": 0.626171875,
"calibration/coverage@5%": 0.118359375,
"calibration/ece": 0.13617818668634782,
"calibration/mean_confidence": 0.5205770833619033,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 567.2,
"completions/max_terminated_length": 567.2,
"completions/mean_length": 222.3947265625,
"completions/mean_terminated_length": 222.52409973144532,
"completions/min_length": 64.4,
"completions/min_terminated_length": 108.0,
"epoch": 0.976,
"grad_norm": 0.000978239462710917,
"learning_rate": 1e-06,
"loss": -0.0006,
"num_tokens": 1033260322.0,
"reward": 1.012249755859375,
"reward_std": 0.06328966841101646,
"rewards/accuracy_reward": 0.55830078125,
"rewards/brier_reward": 0.8091387510299682,
"rewards/confidence_uniqueness_reward": 0.9552855730056763,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0035191518254578114,
"rewards/frontier_coverage_1": 0.1328538328409195,
"rewards/frontier_coverage_10": 0.12540052868425847,
"rewards/frontier_coverage_15": 0.11588934063911438,
"rewards/frontier_coverage_20": 0.1025858499109745,
"rewards/frontier_coverage_25": 0.08929705321788788,
"rewards/frontier_coverage_5": 0.1300761725753546,
"rewards/frontier_ece_reward": 0.0035362728871405126,
"signal/accuracy_reward/centered_abs_mean": 0.085943603515625,
"signal/accuracy_reward/group_std_mean": 0.11529913991689682,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0429718017578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0429718017578125,
"signal/advantage_abs_mean": 0.04712223336100578,
"signal/advantage_pre_scale_abs_mean": 0.04712223336100578,
"signal/advantage_pre_scale_std": 0.09221926033496856,
"signal/advantage_std": 0.09221926033496856,
"signal/brier_reward/centered_abs_mean": 0.09592617601156235,
"signal/brier_reward/group_std_mean": 0.1255634605884552,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011990772001445294,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011990772001445294,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019248013198375703,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024623573198914527,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002406001649796963,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002406001649796963,
"signal/format_reward/centered_abs_mean": 0.00101318359375,
"signal/format_reward/group_std_mean": 0.0017052459996193647,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000506591796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000506591796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023960830876603723,
"signal/frontier_aurc_reward/group_std_mean": 0.00391978737898171,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.288988566258922e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.288988566258922e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14300832003355027,
"signal/frontier_coverage_1/group_std_mean": 0.18482083976268768,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002559848828241229,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002559848828241229,
"signal/frontier_coverage_10/centered_abs_mean": 0.1333800047636032,
"signal/frontier_coverage_10/group_std_mean": 0.17265520095825196,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023875020677223803,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023875020677223803,
"signal/frontier_coverage_15/centered_abs_mean": 0.11999707221984864,
"signal/frontier_coverage_15/group_std_mean": 0.15565804541110992,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002147947554476559,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002147947554476559,
"signal/frontier_coverage_20/centered_abs_mean": 0.0916026160120964,
"signal/frontier_coverage_20/group_std_mean": 0.11769088953733445,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001639686687849462,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001639686687849462,
"signal/frontier_coverage_25/centered_abs_mean": 0.06652972027659416,
"signal/frontier_coverage_25/group_std_mean": 0.0854031965136528,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011908818734809757,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011908818734809757,
"signal/frontier_coverage_5/centered_abs_mean": 0.14001160264015197,
"signal/frontier_coverage_5/group_std_mean": 0.18105824291706085,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025062075816094874,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025062075816094874,
"signal/frontier_ece_reward/centered_abs_mean": 0.005424194037914276,
"signal/frontier_ece_reward/group_std_mean": 0.0069260898977518085,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006780242547392845,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006780242547392845,
"step": 305
},
{
"calibration/aurc": 0.35092088130488575,
"calibration/batch_distribution_entropy": 0.9476465925477726,
"calibration/buffer_distribution_entropy": 0.9620677682967408,
"calibration/confidence_entropy": 0.43464164736881683,
"calibration/coverage@0%": 0.00703125,
"calibration/coverage@1%": 0.00703125,
"calibration/coverage@10%": 0.06917273116438356,
"calibration/coverage@15%": 0.13601317881604696,
"calibration/coverage@20%": 0.15203491927592955,
"calibration/coverage@25%": 0.29150715508806263,
"calibration/coverage@30%": 0.45168633806262226,
"calibration/coverage@5%": 0.020703125,
"calibration/ece": 0.14923292605787758,
"calibration/mean_confidence": 0.4584807254609391,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 460.4,
"completions/max_terminated_length": 460.4,
"completions/mean_length": 219.6595703125,
"completions/mean_terminated_length": 219.70317993164062,
"completions/min_length": 65.8,
"completions/min_terminated_length": 107.6,
"epoch": 0.992,
"grad_norm": 0.0008502269047312438,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 1050638116.0,
"reward": 0.9971673965454102,
"reward_std": 0.059844110161066055,
"rewards/accuracy_reward": 0.52744140625,
"rewards/brier_reward": 0.8077756524085998,
"rewards/confidence_uniqueness_reward": 0.951680326461792,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.00386391868814826,
"rewards/frontier_coverage_1": 0.15493436753749848,
"rewards/frontier_coverage_10": 0.14240696877241135,
"rewards/frontier_coverage_15": 0.12570713311433793,
"rewards/frontier_coverage_20": 0.09610196053981782,
"rewards/frontier_coverage_25": 0.07213738411664963,
"rewards/frontier_coverage_5": 0.15093083381652833,
"rewards/frontier_ece_reward": 0.0031664574285969137,
"signal/accuracy_reward/centered_abs_mean": 0.080609130859375,
"signal/accuracy_reward/group_std_mean": 0.10727933198213577,
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0403045654296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0403045654296875,
"signal/advantage_abs_mean": 0.04485210329294205,
"signal/advantage_pre_scale_abs_mean": 0.04485210329294205,
"signal/advantage_pre_scale_std": 0.08939644247293473,
"signal/advantage_std": 0.08939644247293473,
"signal/brier_reward/centered_abs_mean": 0.09865953177213668,
"signal/brier_reward/group_std_mean": 0.12706311494112016,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012332441471517085,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012332441471517085,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020096756517887115,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026034438982605934,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025120945647358894,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025120945647358894,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002714578388258815,
"signal/frontier_aurc_reward/group_std_mean": 0.004483289271593094,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.859095279243775e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.859095279243775e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14088730067014693,
"signal/frontier_coverage_1/group_std_mean": 0.1814073145389557,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025218826718628406,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025218826718628406,
"signal/frontier_coverage_10/centered_abs_mean": 0.12668565809726715,
"signal/frontier_coverage_10/group_std_mean": 0.16333172023296355,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002267673145979643,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002267673145979643,
"signal/frontier_coverage_15/centered_abs_mean": 0.11176651269197464,
"signal/frontier_coverage_15/group_std_mean": 0.1444163739681244,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020006204955279826,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020006204955279826,
"signal/frontier_coverage_20/centered_abs_mean": 0.08558071851730346,
"signal/frontier_coverage_20/group_std_mean": 0.11098945289850234,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001531894807703793,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001531894807703793,
"signal/frontier_coverage_25/centered_abs_mean": 0.05418485179543495,
"signal/frontier_coverage_25/group_std_mean": 0.0693924218416214,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009699088288471103,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009699088288471103,
"signal/frontier_coverage_5/centered_abs_mean": 0.13585978597402573,
"signal/frontier_coverage_5/group_std_mean": 0.17506172358989716,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002431890135630965,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002431890135630965,
"signal/frontier_ece_reward/centered_abs_mean": 0.004609546437859535,
"signal/frontier_ece_reward/group_std_mean": 0.0058650577440857886,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005761933047324419,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005761933047324419,
"step": 310
},
{
"calibration/aurc": 0.28217641775191643,
"calibration/batch_distribution_entropy": 0.9150021782212961,
"calibration/buffer_distribution_entropy": 0.9734440568203542,
"calibration/confidence_entropy": 0.43623738761584024,
"calibration/coverage@0%": 0.017578125,
"calibration/coverage@1%": 0.017578125,
"calibration/coverage@10%": 0.1083984375,
"calibration/coverage@15%": 0.1181640625,
"calibration/coverage@20%": 0.158203125,
"calibration/coverage@25%": 0.4775390625,
"calibration/coverage@30%": 0.6328125,
"calibration/coverage@5%": 0.029296875,
"calibration/ece": 0.15394213142850904,
"calibration/mean_confidence": 0.6062272876785091,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 468.5,
"completions/max_terminated_length": 468.5,
"completions/mean_length": 220.23912048339844,
"completions/mean_terminated_length": 220.23912048339844,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.9984,
"num_tokens": 1057543382.0,
"reward": 1.0044989585876465,
"reward_std": 0.0631661843508482,
"rewards/accuracy_reward": 0.556884765625,
"rewards/brier_reward": 0.7794812023639679,
"rewards/confidence_uniqueness_reward": 0.9576004147529602,
"rewards/format_reward": 0.999755859375,
"rewards/frontier_aurc_reward": -0.004474298446439207,
"rewards/frontier_coverage_1": 0.09485927224159241,
"rewards/frontier_coverage_10": 0.08385487273335457,
"rewards/frontier_coverage_15": 0.0779510848224163,
"rewards/frontier_coverage_20": 0.07361067458987236,
"rewards/frontier_coverage_25": 0.06920312717556953,
"rewards/frontier_coverage_5": 0.09014599770307541,
"rewards/frontier_ece_reward": 0.0028738640248775482,
"signal/accuracy_reward/centered_abs_mean": 0.0815582275390625,
"signal/accuracy_reward/group_std_mean": 0.10485902056097984,
"signal/accuracy_reward/group_zero_std_frac": 0.7109375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04077911376953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04077911376953125,
"signal/advantage_abs_mean": 0.04879816062748432,
"signal/advantage_pre_scale_abs_mean": 0.04879816062748432,
"signal/advantage_pre_scale_std": 0.09506529569625854,
"signal/advantage_std": 0.09506529569625854,
"signal/brier_reward/centered_abs_mean": 0.1057896763086319,
"signal/brier_reward/group_std_mean": 0.13482706993818283,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013223709538578987,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013223709538578987,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017303465865552425,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02221918012946844,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002162933233194053,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002162933233194053,
"signal/format_reward/centered_abs_mean": 0.0004730224609375,
"signal/format_reward/group_std_mean": 0.0013810679083690047,
"signal/format_reward/group_zero_std_frac": 0.9921875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034338270779699087,
"signal/frontier_aurc_reward/group_std_mean": 0.006090128095820546,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.14655018580379e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.14655018580379e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13515587896108627,
"signal/frontier_coverage_1/group_std_mean": 0.17220665514469147,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024192901328206062,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024192901328206062,
"signal/frontier_coverage_10/centered_abs_mean": 0.11604492366313934,
"signal/frontier_coverage_10/group_std_mean": 0.14831294119358063,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020772040588781238,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020772040588781238,
"signal/frontier_coverage_15/centered_abs_mean": 0.10193650424480438,
"signal/frontier_coverage_15/group_std_mean": 0.1305779367685318,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018246633699163795,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018246633699163795,
"signal/frontier_coverage_20/centered_abs_mean": 0.08051994815468788,
"signal/frontier_coverage_20/group_std_mean": 0.1036677211523056,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001441307074856013,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001441307074856013,
"signal/frontier_coverage_25/centered_abs_mean": 0.057430367916822433,
"signal/frontier_coverage_25/group_std_mean": 0.07381578162312508,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010280035203322768,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010280035203322768,
"signal/frontier_coverage_5/centered_abs_mean": 0.1258089244365692,
"signal/frontier_coverage_5/group_std_mean": 0.1605454459786415,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002251979778520763,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002251979778520763,
"signal/frontier_ece_reward/centered_abs_mean": 0.005368032958358526,
"signal/frontier_ece_reward/group_std_mean": 0.006995111936703324,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0390625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006710041197948158,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006710041197948158,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.000220470251756174,
"train_runtime": 59186.0522,
"train_samples_per_second": 0.338,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1057543382,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}