Files
RLCR-v4-ks-highcov-volume-h…/trainer_state.json
ModelHub XC 7dea28562c 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-highcov-volume-hotpot
Source: Original Platform
2026-04-10 17:45:58 +08:00

9532 lines
597 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.6429490567891648,
"calibration/batch_distribution_entropy": 0.6568061886012901,
"calibration/confidence_entropy": 0.346240177384674,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4945882363067394,
"calibration/mean_confidence": 0.7883452419087591,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04150390625,
"completions/max_length": 1505.0,
"completions/max_terminated_length": 1505.0,
"completions/mean_length": 212.3419921875,
"completions/mean_terminated_length": 221.52340393066407,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.06983423233032227,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0083,
"num_tokens": 17018414.0,
"reward": 0.7155525326728821,
"reward_std": 0.601375138759613,
"rewards/accuracy_reward": 0.2197265625,
"rewards/brier_reward": 0.37495601177215576,
"rewards/confidence_uniqueness_reward": 0.4819000899791718,
"rewards/format_reward": 0.67919921875,
"rewards/frontier_aurc_reward": 0.3020222902297974,
"rewards/frontier_ece_reward": 0.3020222902297974,
"rewards/frontier_entropy_batch_reward": -0.6498908281326294,
"rewards/volume_coverage_0": 0.3020222902297974,
"rewards/volume_coverage_1": 0.3020222902297974,
"rewards/volume_coverage_10": 0.3020222902297974,
"rewards/volume_coverage_15": 0.3020222902297974,
"rewards/volume_coverage_20": 0.3020222902297974,
"rewards/volume_coverage_25": 0.3020222902297974,
"rewards/volume_coverage_5": 0.3020222902297974,
"signal/accuracy_reward/centered_abs_mean": 0.2397705078125,
"signal/accuracy_reward/group_std_mean": 0.2794930338859558,
"signal/accuracy_reward/group_zero_std_frac": 0.33125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11988525390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11988525390625,
"signal/advantage_abs_mean": 0.5115305304527282,
"signal/advantage_pre_scale_abs_mean": 0.5115305304527282,
"signal/advantage_pre_scale_std": 0.6212727189064026,
"signal/advantage_std": 0.6212727189064026,
"signal/brier_reward/centered_abs_mean": 0.31964564323425293,
"signal/brier_reward/group_std_mean": 0.36430342197418214,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03196456506848335,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03196456506848335,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.298951119184494,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3491382360458374,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02989511229097843,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02989511229097843,
"signal/format_reward/centered_abs_mean": 0.407049560546875,
"signal/format_reward/group_std_mean": 0.45584299564361574,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2035247802734375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.2035247802734375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2922212302684784,
"signal/frontier_aurc_reward/group_std_mean": 0.3429143726825714,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036527654621750115,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036527654621750115,
"signal/frontier_ece_reward/centered_abs_mean": 0.2922212302684784,
"signal/frontier_ece_reward/group_std_mean": 0.3429143726825714,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02922212369740009,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02922212369740009,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4286865532398224,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4737535834312439,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042868655920028684,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042868655920028684,
"signal/volume_coverage_0/centered_abs_mean": 0.2922212302684784,
"signal/volume_coverage_0/group_std_mean": 0.3429143726825714,
"signal/volume_coverage_0/group_zero_std_frac": 0.003125,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_1/centered_abs_mean": 0.2922212302684784,
"signal/volume_coverage_1/group_std_mean": 0.3429143726825714,
"signal/volume_coverage_1/group_zero_std_frac": 0.003125,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_10/centered_abs_mean": 0.2922212302684784,
"signal/volume_coverage_10/group_std_mean": 0.3429143726825714,
"signal/volume_coverage_10/group_zero_std_frac": 0.003125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_15/centered_abs_mean": 0.2922212302684784,
"signal/volume_coverage_15/group_std_mean": 0.3429143726825714,
"signal/volume_coverage_15/group_zero_std_frac": 0.003125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_20/centered_abs_mean": 0.2922212302684784,
"signal/volume_coverage_20/group_std_mean": 0.3429143726825714,
"signal/volume_coverage_20/group_zero_std_frac": 0.003125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_25/centered_abs_mean": 0.2922212302684784,
"signal/volume_coverage_25/group_std_mean": 0.3429143726825714,
"signal/volume_coverage_25/group_zero_std_frac": 0.003125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_5/centered_abs_mean": 0.2922212302684784,
"signal/volume_coverage_5/group_std_mean": 0.3429143726825714,
"signal/volume_coverage_5/group_zero_std_frac": 0.003125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.02922212369740009,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 0.02922212369740009,
"step": 5
},
{
"calibration/aurc": 0.6774772714272642,
"calibration/batch_distribution_entropy": 0.6704348051300373,
"calibration/confidence_entropy": 0.3531415986791937,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5158973434001635,
"calibration/mean_confidence": 0.7822890515633215,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0384765625,
"completions/max_length": 1481.2,
"completions/max_terminated_length": 1481.2,
"completions/mean_length": 205.05126953125,
"completions/mean_terminated_length": 213.29146423339844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.01851228065788746,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0124,
"num_tokens": 34218491.0,
"reward": 0.7374850273132324,
"reward_std": 0.5725616097450257,
"rewards/accuracy_reward": 0.21396484375,
"rewards/brier_reward": 0.3848159670829773,
"rewards/confidence_uniqueness_reward": 0.5191182971000672,
"rewards/format_reward": 0.72490234375,
"rewards/frontier_aurc_reward": 0.3033848226070404,
"rewards/frontier_ece_reward": 0.3033848226070404,
"rewards/frontier_entropy_batch_reward": -0.6884217381477356,
"rewards/volume_coverage_0": 0.3033848226070404,
"rewards/volume_coverage_1": 0.3033848226070404,
"rewards/volume_coverage_10": 0.3033848226070404,
"rewards/volume_coverage_15": 0.3033848226070404,
"rewards/volume_coverage_20": 0.3033848226070404,
"rewards/volume_coverage_25": 0.3033848226070404,
"rewards/volume_coverage_5": 0.3033848226070404,
"signal/accuracy_reward/centered_abs_mean": 0.222235107421875,
"signal/accuracy_reward/group_std_mean": 0.26479236483573915,
"signal/accuracy_reward/group_zero_std_frac": 0.346875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1111175537109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1111175537109375,
"signal/advantage_abs_mean": 0.47654428482055666,
"signal/advantage_pre_scale_abs_mean": 0.47654428482055666,
"signal/advantage_pre_scale_std": 0.5928633451461792,
"signal/advantage_std": 0.5928633451461792,
"signal/brier_reward/centered_abs_mean": 0.30740639567375183,
"signal/brier_reward/group_std_mean": 0.35535589456558225,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030740641802549363,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.030740641802549363,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2774076223373413,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3378679931163788,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027740763500332832,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027740763500332832,
"signal/format_reward/centered_abs_mean": 0.369842529296875,
"signal/format_reward/group_std_mean": 0.43356016278266907,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1849212646484375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1849212646484375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.28120753169059753,
"signal/frontier_aurc_reward/group_std_mean": 0.3347383916378021,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003515094378963113,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003515094378963113,
"signal/frontier_ece_reward/centered_abs_mean": 0.28120753169059753,
"signal/frontier_ece_reward/group_std_mean": 0.3347383916378021,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.028120755031704903,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.028120755031704903,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3998861491680145,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4569081485271454,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03998861610889435,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03998861610889435,
"signal/volume_coverage_0/centered_abs_mean": 0.28120753169059753,
"signal/volume_coverage_0/group_std_mean": 0.3347383916378021,
"signal/volume_coverage_0/group_zero_std_frac": 0.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_1/centered_abs_mean": 0.28120753169059753,
"signal/volume_coverage_1/group_std_mean": 0.3347383916378021,
"signal/volume_coverage_1/group_zero_std_frac": 0.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_10/centered_abs_mean": 0.28120753169059753,
"signal/volume_coverage_10/group_std_mean": 0.3347383916378021,
"signal/volume_coverage_10/group_zero_std_frac": 0.0,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_15/centered_abs_mean": 0.28120753169059753,
"signal/volume_coverage_15/group_std_mean": 0.3347383916378021,
"signal/volume_coverage_15/group_zero_std_frac": 0.0,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_20/centered_abs_mean": 0.28120753169059753,
"signal/volume_coverage_20/group_std_mean": 0.3347383916378021,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_25/centered_abs_mean": 0.28120753169059753,
"signal/volume_coverage_25/group_std_mean": 0.3347383916378021,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_5/centered_abs_mean": 0.28120753169059753,
"signal/volume_coverage_5/group_std_mean": 0.3347383916378021,
"signal/volume_coverage_5/group_zero_std_frac": 0.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.028120755031704903,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 0.028120755031704903,
"step": 10
},
{
"calibration/aurc": 0.5685390614142158,
"calibration/batch_distribution_entropy": 0.6455145518487925,
"calibration/buffer_distribution_entropy": 0.670253051700831,
"calibration/confidence_entropy": 0.34757602436925944,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.45315780046924337,
"calibration/mean_confidence": 0.8027396171602854,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0193359375,
"completions/max_length": 1439.4,
"completions/max_terminated_length": 1439.4,
"completions/mean_length": 172.82841796875,
"completions/mean_terminated_length": 176.34844970703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 11.4,
"epoch": 0.048,
"grad_norm": 0.022836102172732353,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0084,
"num_tokens": 51036990.0,
"reward": 0.865392017364502,
"reward_std": 0.44560941457748415,
"rewards/accuracy_reward": 0.28134765625,
"rewards/brier_reward": 0.49504044055938723,
"rewards/confidence_uniqueness_reward": 0.6459718704223633,
"rewards/format_reward": 0.8869140625,
"rewards/frontier_aurc_reward": 0.3092373930849135,
"rewards/frontier_ece_reward": 0.2995997928082943,
"rewards/frontier_entropy_batch_reward": -0.8416913747787476,
"rewards/volume_coverage_0": 0.3107194304991089,
"rewards/volume_coverage_1": 0.3107194304991089,
"rewards/volume_coverage_10": 0.3107194304991089,
"rewards/volume_coverage_15": 0.3107194304991089,
"rewards/volume_coverage_20": 0.3107194304991089,
"rewards/volume_coverage_25": 0.3107194331706008,
"rewards/volume_coverage_5": 0.3107194304991089,
"signal/accuracy_reward/centered_abs_mean": 0.197674560546875,
"signal/accuracy_reward/group_std_mean": 0.2459003061056137,
"signal/accuracy_reward/group_zero_std_frac": 0.35625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0988372802734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0988372802734375,
"signal/advantage_abs_mean": 0.3519722521305084,
"signal/advantage_pre_scale_abs_mean": 0.3519722521305084,
"signal/advantage_pre_scale_std": 0.4639441788196564,
"signal/advantage_std": 0.4639441788196564,
"signal/brier_reward/centered_abs_mean": 0.2721615880727768,
"signal/brier_reward/group_std_mean": 0.3270188093185425,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027216159179806708,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.027216159179806708,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19719513058662413,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2604415327310562,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01971951425075531,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01971951425075531,
"signal/format_reward/centered_abs_mean": 0.18668212890625,
"signal/format_reward/group_std_mean": 0.2880357503890991,
"signal/format_reward/group_zero_std_frac": 0.071875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.093341064453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.093341064453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.21899721091613172,
"signal/frontier_aurc_reward/group_std_mean": 0.26340931504964826,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0027374652767321096,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0027374652767321096,
"signal/frontier_ece_reward/centered_abs_mean": 0.24538690745830535,
"signal/frontier_ece_reward/group_std_mean": 0.2943758606910706,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024538691714406015,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024538691714406015,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2529719710350037,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35891305804252627,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025297198072075845,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025297198072075845,
"signal/volume_coverage_0/centered_abs_mean": 0.21790143859763106,
"signal/volume_coverage_0/group_std_mean": 0.261889320824187,
"signal/volume_coverage_0/group_zero_std_frac": 0.2,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_1/centered_abs_mean": 0.21790143859763106,
"signal/volume_coverage_1/group_std_mean": 0.261889320824187,
"signal/volume_coverage_1/group_zero_std_frac": 0.2,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_10/centered_abs_mean": 0.21790143859763106,
"signal/volume_coverage_10/group_std_mean": 0.261889320824187,
"signal/volume_coverage_10/group_zero_std_frac": 0.2,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_15/centered_abs_mean": 0.21790143859763106,
"signal/volume_coverage_15/group_std_mean": 0.261889320824187,
"signal/volume_coverage_15/group_zero_std_frac": 0.2,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_20/centered_abs_mean": 0.21790143859763106,
"signal/volume_coverage_20/group_std_mean": 0.261889320824187,
"signal/volume_coverage_20/group_zero_std_frac": 0.2,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_25/centered_abs_mean": 0.21790144193124733,
"signal/volume_coverage_25/group_std_mean": 0.26188932565360795,
"signal/volume_coverage_25/group_zero_std_frac": 0.15,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.02179014531071184,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.02179014531071184,
"signal/volume_coverage_5/centered_abs_mean": 0.21790143859763106,
"signal/volume_coverage_5/group_std_mean": 0.261889320824187,
"signal/volume_coverage_5/group_zero_std_frac": 0.2,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.021790144977350195,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 0.021790144977350195,
"step": 15
},
{
"calibration/aurc": 0.5202567853912221,
"calibration/batch_distribution_entropy": 0.7380507805905504,
"calibration/buffer_distribution_entropy": 0.6726920946793268,
"calibration/confidence_entropy": 0.3816252415285025,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.351275651830115,
"calibration/mean_confidence": 0.7499806626748107,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00556640625,
"completions/max_length": 1163.2,
"completions/max_terminated_length": 1163.2,
"completions/mean_length": 130.587109375,
"completions/mean_terminated_length": 131.34015197753905,
"completions/min_length": 0.0,
"completions/min_terminated_length": 22.4,
"epoch": 0.064,
"grad_norm": 0.004004279151558876,
"learning_rate": 1e-06,
"loss": -0.0028,
"num_tokens": 67292602.0,
"reward": 0.7010321021080017,
"reward_std": 0.19609815776348113,
"rewards/accuracy_reward": 0.344140625,
"rewards/brier_reward": 0.5915492057800293,
"rewards/confidence_uniqueness_reward": 0.7575255513191224,
"rewards/format_reward": 0.9775390625,
"rewards/frontier_aurc_reward": -0.00692891301587224,
"rewards/frontier_ece_reward": -0.042981109907850625,
"rewards/frontier_entropy_batch_reward": -0.9033052682876587,
"rewards/volume_coverage_0": 2.1003434658162236e-09,
"rewards/volume_coverage_1": 2.1003434658162236e-09,
"rewards/volume_coverage_10": 2.1003434658162236e-09,
"rewards/volume_coverage_15": 4.33701768831618e-08,
"rewards/volume_coverage_20": 5.88921527877595e-08,
"rewards/volume_coverage_25": 1.2825582353936938e-07,
"rewards/volume_coverage_5": 2.1003434658162236e-09,
"signal/accuracy_reward/centered_abs_mean": 0.19951171875,
"signal/accuracy_reward/group_std_mean": 0.2516826242208481,
"signal/accuracy_reward/group_zero_std_frac": 0.340625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.099755859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.099755859375,
"signal/advantage_abs_mean": 0.1468222200870514,
"signal/advantage_pre_scale_abs_mean": 0.1468222200870514,
"signal/advantage_pre_scale_std": 0.21374104022979737,
"signal/advantage_std": 0.21374104022979737,
"signal/brier_reward/centered_abs_mean": 0.2464316189289093,
"signal/brier_reward/group_std_mean": 0.30184549689292905,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02464316114783287,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02464316114783287,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12081145346164704,
"signal/confidence_uniqueness_reward/group_std_mean": 0.15997391939163208,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01208114568144083,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01208114568144083,
"signal/format_reward/centered_abs_mean": 0.04210205078125,
"signal/format_reward/group_std_mean": 0.09824754893779755,
"signal/format_reward/group_zero_std_frac": 0.53125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.021051025390625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.021051025390625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004936764482408762,
"signal/frontier_aurc_reward/group_std_mean": 0.006853995472192764,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.17095582128968e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.17095582128968e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.13821439445018768,
"signal/frontier_ece_reward/group_std_mean": 0.16570349037647247,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013821440003812312,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013821440003812312,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1694903701543808,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3000731647014618,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016949037089943886,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016949037089943886,
"signal/volume_coverage_0/centered_abs_mean": 3.4365328471785974e-09,
"signal/volume_coverage_0/group_std_mean": 5.13227842446895e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.903125,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.4365328083207915e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 3.4365328083207915e-10,
"signal/volume_coverage_1/centered_abs_mean": 3.4365328471785974e-09,
"signal/volume_coverage_1/group_std_mean": 5.13227842446895e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.903125,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.4365328083207915e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 3.4365328083207915e-10,
"signal/volume_coverage_10/centered_abs_mean": 3.4365328471785974e-09,
"signal/volume_coverage_10/group_std_mean": 5.13227842446895e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.903125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.4365328083207915e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.4365328083207915e-10,
"signal/volume_coverage_15/centered_abs_mean": 7.97853836442819e-08,
"signal/volume_coverage_15/group_std_mean": 1.1836238478224459e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.903125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.978538937858381e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 7.978538937858381e-09,
"signal/volume_coverage_20/centered_abs_mean": 1.0703116398724788e-07,
"signal/volume_coverage_20/group_std_mean": 1.584521130748584e-07,
"signal/volume_coverage_20/group_zero_std_frac": 0.803125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.0703117145349773e-08,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.0703117145349773e-08,
"signal/volume_coverage_25/centered_abs_mean": 2.0704597146758986e-07,
"signal/volume_coverage_25/group_std_mean": 3.073400936637327e-07,
"signal/volume_coverage_25/group_zero_std_frac": 0.7,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.0704598141518814e-08,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.0704598141518814e-08,
"signal/volume_coverage_5/centered_abs_mean": 3.4365328471785974e-09,
"signal/volume_coverage_5/group_std_mean": 5.13227842446895e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.903125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.4365328083207915e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 3.4365328083207915e-10,
"step": 20
},
{
"calibration/aurc": 0.637877780884015,
"calibration/batch_distribution_entropy": 0.8666604687716237,
"calibration/buffer_distribution_entropy": 0.7172808030818212,
"calibration/confidence_entropy": 0.4574993969776974,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3524438351727639,
"calibration/mean_confidence": 0.650396233605561,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0021484375,
"completions/max_length": 983.2,
"completions/max_terminated_length": 983.2,
"completions/mean_length": 112.5263671875,
"completions/mean_terminated_length": 112.76943817138672,
"completions/min_length": 0.0,
"completions/min_terminated_length": 35.4,
"epoch": 0.08,
"grad_norm": 0.00510385213419795,
"learning_rate": 1e-06,
"loss": -0.0031,
"num_tokens": 83378024.0,
"reward": 0.7345107555389404,
"reward_std": 0.17090575098991395,
"rewards/accuracy_reward": 0.35263671875,
"rewards/brier_reward": 0.6509084701538086,
"rewards/confidence_uniqueness_reward": 0.8421475172042847,
"rewards/format_reward": 0.99287109375,
"rewards/frontier_aurc_reward": -0.005880103260278702,
"rewards/frontier_ece_reward": -0.03344872035086155,
"rewards/frontier_entropy_batch_reward": -0.841304075717926,
"rewards/volume_coverage_0": 1.3084958672138214e-10,
"rewards/volume_coverage_1": 1.3084958672138214e-10,
"rewards/volume_coverage_10": 3.28643133817863e-09,
"rewards/volume_coverage_15": 3.28643133817863e-09,
"rewards/volume_coverage_20": 1.2797271364828333e-08,
"rewards/volume_coverage_25": 2.003043941045668e-07,
"rewards/volume_coverage_5": 1.3084958672138214e-10,
"signal/accuracy_reward/centered_abs_mean": 0.191363525390625,
"signal/accuracy_reward/group_std_mean": 0.23868935704231262,
"signal/accuracy_reward/group_zero_std_frac": 0.378125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0956817626953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0956817626953125,
"signal/advantage_abs_mean": 0.13158616423606873,
"signal/advantage_pre_scale_abs_mean": 0.13158616423606873,
"signal/advantage_pre_scale_std": 0.1908570796251297,
"signal/advantage_std": 0.1908570796251297,
"signal/brier_reward/centered_abs_mean": 0.23046765923500062,
"signal/brier_reward/group_std_mean": 0.28209164142608645,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023046765848994254,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023046765848994254,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07654989808797837,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10619462579488755,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007654989883303642,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007654989883303642,
"signal/format_reward/centered_abs_mean": 0.013677978515625,
"signal/format_reward/group_std_mean": 0.03662779070436954,
"signal/format_reward/group_zero_std_frac": 0.80625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0068389892578125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0068389892578125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003424457693472505,
"signal/frontier_aurc_reward/group_std_mean": 0.005068050231784582,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2805721750482914e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2805721750482914e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.11674794852733612,
"signal/frontier_ece_reward/group_std_mean": 0.1464279443025589,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011674795113503934,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011674795113503934,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2606357991695404,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.399100261926651,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02606358118355274,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02606358118355274,
"signal/volume_coverage_0/centered_abs_mean": 3.6999710295582134e-09,
"signal/volume_coverage_0/group_std_mean": 5.391393764764274e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.909375,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.699970829718069e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 3.699970829718069e-10,
"signal/volume_coverage_1/centered_abs_mean": 3.6999710295582134e-09,
"signal/volume_coverage_1/group_std_mean": 5.391393764764274e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.909375,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.699970829718069e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 3.699970829718069e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.4863867259240492e-08,
"signal/volume_coverage_10/group_std_mean": 2.1319809062614327e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.859375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4863867053849233e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.4863867053849233e-09,
"signal/volume_coverage_15/centered_abs_mean": 1.4863867259240492e-08,
"signal/volume_coverage_15/group_std_mean": 2.1319809062614327e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.859375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4863867053849233e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.4863867053849233e-09,
"signal/volume_coverage_20/centered_abs_mean": 4.194976913618476e-08,
"signal/volume_coverage_20/group_std_mean": 6.138168572311642e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.809375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.1949774792771065e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 4.1949774792771065e-09,
"signal/volume_coverage_25/centered_abs_mean": 3.951772380994534e-07,
"signal/volume_coverage_25/group_std_mean": 5.592273702115236e-07,
"signal/volume_coverage_25/group_zero_std_frac": 0.68125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.951772487575944e-08,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.951772487575944e-08,
"signal/volume_coverage_5/centered_abs_mean": 3.6999710295582134e-09,
"signal/volume_coverage_5/group_std_mean": 5.391393764764274e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.909375,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.699970829718069e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 3.699970829718069e-10,
"step": 25
},
{
"calibration/aurc": 0.6324060218490095,
"calibration/batch_distribution_entropy": 0.9494056926566277,
"calibration/buffer_distribution_entropy": 0.7762817425645073,
"calibration/confidence_entropy": 0.5202123272314586,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.27552858640012845,
"calibration/mean_confidence": 0.5462257194993483,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00185546875,
"completions/max_length": 667.0,
"completions/max_terminated_length": 667.0,
"completions/mean_length": 106.41845703125,
"completions/mean_terminated_length": 106.6145523071289,
"completions/min_length": 0.0,
"completions/min_terminated_length": 33.8,
"epoch": 0.096,
"grad_norm": 0.004249365534633398,
"learning_rate": 1e-06,
"loss": -0.0011,
"num_tokens": 99512357.0,
"reward": 0.7657663106918335,
"reward_std": 0.16878970265388488,
"rewards/accuracy_reward": 0.35322265625,
"rewards/brier_reward": 0.6870332479476928,
"rewards/confidence_uniqueness_reward": 0.9030344367027283,
"rewards/format_reward": 0.994140625,
"rewards/frontier_aurc_reward": -0.005292004905641079,
"rewards/frontier_ece_reward": -0.026083091273903847,
"rewards/frontier_entropy_batch_reward": -0.6424768328666687,
"rewards/volume_coverage_0": 1.1482879369584253e-09,
"rewards/volume_coverage_1": 1.1482879369584253e-09,
"rewards/volume_coverage_10": 1.1152872342323761e-08,
"rewards/volume_coverage_15": 7.12950278647373e-08,
"rewards/volume_coverage_20": 9.590478232124333e-08,
"rewards/volume_coverage_25": 1.634254282123493e-07,
"rewards/volume_coverage_5": 1.1482879369584253e-09,
"signal/accuracy_reward/centered_abs_mean": 0.191375732421875,
"signal/accuracy_reward/group_std_mean": 0.24138614535331726,
"signal/accuracy_reward/group_zero_std_frac": 0.3625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0956878662109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0956878662109375,
"signal/advantage_abs_mean": 0.12984309047460557,
"signal/advantage_pre_scale_abs_mean": 0.12984309047460557,
"signal/advantage_pre_scale_std": 0.18535825312137605,
"signal/advantage_std": 0.18535825312137605,
"signal/brier_reward/centered_abs_mean": 0.2213761627674103,
"signal/brier_reward/group_std_mean": 0.2727460443973541,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02213761620223522,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02213761620223522,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0604395791888237,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08873669505119323,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0060439580120146275,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0060439580120146275,
"signal/format_reward/centered_abs_mean": 0.01134033203125,
"signal/format_reward/group_std_mean": 0.03280932120978832,
"signal/format_reward/group_zero_std_frac": 0.815625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005670166015625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005670166015625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026173558086156844,
"signal/frontier_aurc_reward/group_std_mean": 0.004071610467508435,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.271694804425351e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.271694804425351e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.09653272926807403,
"signal/frontier_ece_reward/group_std_mean": 0.1304735615849495,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009653273224830627,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009653273224830627,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4387050747871399,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5275961101055145,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.043870508670806885,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.043870508670806885,
"signal/volume_coverage_0/centered_abs_mean": 2.9389548905633945e-09,
"signal/volume_coverage_0/group_std_mean": 3.83164269202041e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.938955051545733e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.938955051545733e-10,
"signal/volume_coverage_1/centered_abs_mean": 2.9389548905633945e-09,
"signal/volume_coverage_1/group_std_mean": 3.83164269202041e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.938955051545733e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.938955051545733e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.317042134825087e-08,
"signal/volume_coverage_10/group_std_mean": 3.1277936196616454e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.784375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.317042160915328e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.317042160915328e-09,
"signal/volume_coverage_15/centered_abs_mean": 8.99444128843463e-08,
"signal/volume_coverage_15/group_std_mean": 1.1935079364810973e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.734375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.994441605403302e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 8.994441605403302e-09,
"signal/volume_coverage_20/centered_abs_mean": 1.1986816018660916e-07,
"signal/volume_coverage_20/group_std_mean": 1.592895981783471e-07,
"signal/volume_coverage_20/group_zero_std_frac": 0.684375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.1986816453313231e-08,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.1986816453313231e-08,
"signal/volume_coverage_25/centered_abs_mean": 2.4859488831907586e-07,
"signal/volume_coverage_25/group_std_mean": 3.3005767701155265e-07,
"signal/volume_coverage_25/group_zero_std_frac": 0.534375,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.485949108788077e-08,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.485949108788077e-08,
"signal/volume_coverage_5/centered_abs_mean": 2.9389548905633945e-09,
"signal/volume_coverage_5/group_std_mean": 3.83164269202041e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.938955051545733e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.938955051545733e-10,
"step": 30
},
{
"calibration/aurc": 0.5107678081003135,
"calibration/batch_distribution_entropy": 0.9589194598555851,
"calibration/buffer_distribution_entropy": 0.8460931740982854,
"calibration/confidence_entropy": 0.5240642540622189,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.003137254901960784,
"calibration/coverage@20%": 0.00392156862745098,
"calibration/coverage@25%": 0.022745098039215685,
"calibration/coverage@30%": 0.02627450980392157,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.17553510974376438,
"calibration/mean_confidence": 0.42006594777913764,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00302734375,
"completions/max_length": 603.0,
"completions/max_terminated_length": 603.0,
"completions/mean_length": 100.521484375,
"completions/mean_terminated_length": 100.82618560791016,
"completions/min_length": 0.0,
"completions/min_terminated_length": 36.4,
"epoch": 0.112,
"grad_norm": 0.009812681004405022,
"learning_rate": 1e-06,
"loss": -0.0036,
"num_tokens": 115651169.0,
"reward": 0.8234237790107727,
"reward_std": 0.15335985720157624,
"rewards/accuracy_reward": 0.3880859375,
"rewards/brier_reward": 0.7183116436004638,
"rewards/confidence_uniqueness_reward": 0.9424231290817261,
"rewards/format_reward": 0.99404296875,
"rewards/frontier_aurc_reward": -0.004633870534598827,
"rewards/frontier_ece_reward": -0.005527885630726814,
"rewards/frontier_entropy_batch_reward": -0.3310348570346832,
"rewards/volume_coverage_0": 7.422226350195515e-09,
"rewards/volume_coverage_1": 7.422226350195515e-09,
"rewards/volume_coverage_10": 1.2741683225181433e-08,
"rewards/volume_coverage_15": 2.8442976557352395e-08,
"rewards/volume_coverage_20": 3.798906951790748e-08,
"rewards/volume_coverage_25": 8.618370428981059e-08,
"rewards/volume_coverage_5": 7.422226350195515e-09,
"signal/accuracy_reward/centered_abs_mean": 0.1885009765625,
"signal/accuracy_reward/group_std_mean": 0.23862674236297607,
"signal/accuracy_reward/group_zero_std_frac": 0.3625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09425048828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09425048828125,
"signal/advantage_abs_mean": 0.11734064370393753,
"signal/advantage_pre_scale_abs_mean": 0.11734064370393753,
"signal/advantage_pre_scale_std": 0.17148884534835815,
"signal/advantage_std": 0.17148884534835815,
"signal/brier_reward/centered_abs_mean": 0.20886878669261932,
"signal/brier_reward/group_std_mean": 0.25930328369140626,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020886879414319992,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020886879414319992,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031046908348798752,
"signal/confidence_uniqueness_reward/group_std_mean": 0.055822306871414186,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031046907417476175,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031046907417476175,
"signal/format_reward/centered_abs_mean": 0.011505126953125,
"signal/format_reward/group_std_mean": 0.03268913105130196,
"signal/format_reward/group_zero_std_frac": 0.81875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0057525634765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0057525634765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017553700832650065,
"signal/frontier_aurc_reward/group_std_mean": 0.002825619326904416,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.194212611357216e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.194212611357216e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.06550199910998344,
"signal/frontier_ece_reward/group_std_mean": 0.09871184825897217,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006550200004130602,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006550200004130602,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.41493695974349976,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4780768632888794,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.041493697464466094,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.041493697464466094,
"signal/volume_coverage_0/centered_abs_mean": 1.0677126127678349e-08,
"signal/volume_coverage_0/group_std_mean": 1.309429287221775e-08,
"signal/volume_coverage_0/group_zero_std_frac": 0.85,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.0677125621139093e-09,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.0677125621139093e-09,
"signal/volume_coverage_1/centered_abs_mean": 1.0677126127678349e-08,
"signal/volume_coverage_1/group_std_mean": 1.309429287221775e-08,
"signal/volume_coverage_1/group_zero_std_frac": 0.85,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.0677125621139093e-09,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.0677125621139093e-09,
"signal/volume_coverage_10/centered_abs_mean": 1.7244384153958236e-08,
"signal/volume_coverage_10/group_std_mean": 2.1109557413190318e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.85,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7244385246140138e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.7244385246140138e-09,
"signal/volume_coverage_15/centered_abs_mean": 4.001330921044044e-08,
"signal/volume_coverage_15/group_std_mean": 4.904761328194951e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.846875,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.001331171260558e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 4.001331171260558e-09,
"signal/volume_coverage_20/centered_abs_mean": 6.514198828133643e-08,
"signal/volume_coverage_20/group_std_mean": 8.038617682615268e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.746875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.514199287627198e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 6.514199287627198e-09,
"signal/volume_coverage_25/centered_abs_mean": 1.9327816443981405e-07,
"signal/volume_coverage_25/group_std_mean": 2.4028904537232166e-07,
"signal/volume_coverage_25/group_zero_std_frac": 0.746875,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.9327816443842628e-08,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.9327816443842628e-08,
"signal/volume_coverage_5/centered_abs_mean": 1.0677126127678349e-08,
"signal/volume_coverage_5/group_std_mean": 1.309429287221775e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.85,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.0677125621139093e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.0677125621139093e-09,
"step": 35
},
{
"calibration/aurc": 0.5580233719668242,
"calibration/batch_distribution_entropy": 0.9514386602270696,
"calibration/buffer_distribution_entropy": 0.9009901987870197,
"calibration/confidence_entropy": 0.5205907217133923,
"calibration/coverage@0%": 0.0019700600666023465,
"calibration/coverage@1%": 0.0019700600666023465,
"calibration/coverage@10%": 0.0019700600666023465,
"calibration/coverage@15%": 0.0019700600666023465,
"calibration/coverage@20%": 0.01103295882481093,
"calibration/coverage@25%": 0.023240795493448697,
"calibration/coverage@30%": 0.03229591360368491,
"calibration/coverage@5%": 0.0019700600666023465,
"calibration/ece": 0.18875669542512402,
"calibration/mean_confidence": 0.39032947552856273,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00224609375,
"completions/max_length": 603.8,
"completions/max_terminated_length": 603.8,
"completions/mean_length": 104.0462890625,
"completions/mean_terminated_length": 104.28194122314453,
"completions/min_length": 0.0,
"completions/min_terminated_length": 41.0,
"epoch": 0.128,
"grad_norm": 0.007568053435534239,
"learning_rate": 1e-06,
"loss": -0.0021,
"num_tokens": 131633275.0,
"reward": 0.8242883682250977,
"reward_std": 0.13872416019439698,
"rewards/accuracy_reward": 0.3837890625,
"rewards/brier_reward": 0.7208372712135315,
"rewards/confidence_uniqueness_reward": 0.9432681918144226,
"rewards/format_reward": 0.99482421875,
"rewards/frontier_aurc_reward": -0.004502659384161234,
"rewards/frontier_ece_reward": -0.0009349806932732463,
"rewards/frontier_entropy_batch_reward": -0.3127904772758484,
"rewards/volume_coverage_0": 3.823643576761349e-09,
"rewards/volume_coverage_1": 3.823643576761349e-09,
"rewards/volume_coverage_10": 2.2619500181231268e-08,
"rewards/volume_coverage_15": 4.996604976159613e-08,
"rewards/volume_coverage_20": 5.745271669244101e-08,
"rewards/volume_coverage_25": 7.44545543190922e-08,
"rewards/volume_coverage_5": 3.823643576761349e-09,
"signal/accuracy_reward/centered_abs_mean": 0.16480712890625,
"signal/accuracy_reward/group_std_mean": 0.21133655607700347,
"signal/accuracy_reward/group_zero_std_frac": 0.4125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.082403564453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.082403564453125,
"signal/advantage_abs_mean": 0.10589775294065476,
"signal/advantage_pre_scale_abs_mean": 0.10589775294065476,
"signal/advantage_pre_scale_std": 0.15886488556861877,
"signal/advantage_std": 0.15886488556861877,
"signal/brier_reward/centered_abs_mean": 0.20160384476184845,
"signal/brier_reward/group_std_mean": 0.25164816081523894,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020160384848713873,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020160384848713873,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024899404495954514,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046268679201602936,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024899405427277086,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024899405427277086,
"signal/format_reward/centered_abs_mean": 0.010015869140625,
"signal/format_reward/group_std_mean": 0.028942330926656722,
"signal/format_reward/group_zero_std_frac": 0.8375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0050079345703125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0050079345703125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001641789567656815,
"signal/frontier_aurc_reward/group_std_mean": 0.0024957799818366767,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.052236923191231e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.052236923191231e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.05462209209799766,
"signal/frontier_ece_reward/group_std_mean": 0.08531963378190995,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005462209228426218,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005462209228426218,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3865876078605652,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.45401414632797243,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038658761978149415,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038658761978149415,
"signal/volume_coverage_0/centered_abs_mean": 5.4679469368990045e-09,
"signal/volume_coverage_0/group_std_mean": 6.669823321425738e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.8875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.467947296333708e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.467947296333708e-10,
"signal/volume_coverage_1/centered_abs_mean": 5.4679469368990045e-09,
"signal/volume_coverage_1/group_std_mean": 6.669823321425738e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.8875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.467947296333708e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.467947296333708e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.886184602146624e-08,
"signal/volume_coverage_10/group_std_mean": 3.515927660679985e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.8375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.886184567035821e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.886184567035821e-09,
"signal/volume_coverage_15/centered_abs_mean": 6.196580612793934e-08,
"signal/volume_coverage_15/group_std_mean": 7.554549857347049e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.7875,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.196581029543902e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 6.196581029543902e-09,
"signal/volume_coverage_20/centered_abs_mean": 7.125139318275853e-08,
"signal/volume_coverage_20/group_std_mean": 8.68546331800335e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.7875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.125139948188642e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 7.125139948188642e-09,
"signal/volume_coverage_25/centered_abs_mean": 9.226549457508338e-08,
"signal/volume_coverage_25/group_std_mean": 1.124894589976666e-07,
"signal/volume_coverage_25/group_zero_std_frac": 0.7,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 9.226549527868722e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 9.226549527868722e-09,
"signal/volume_coverage_5/centered_abs_mean": 5.4679469368990045e-09,
"signal/volume_coverage_5/group_std_mean": 6.669823321425738e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.8875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.467947296333708e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 5.467947296333708e-10,
"step": 40
},
{
"calibration/aurc": 0.45307609323828046,
"calibration/batch_distribution_entropy": 0.9821801695393473,
"calibration/buffer_distribution_entropy": 0.9315210180147012,
"calibration/confidence_entropy": 0.5304064561224848,
"calibration/coverage@0%": 0.0007820144324853229,
"calibration/coverage@1%": 0.0007820144324853229,
"calibration/coverage@10%": 0.0007820144324853229,
"calibration/coverage@15%": 0.0007820144324853229,
"calibration/coverage@20%": 0.053125764432485324,
"calibration/coverage@25%": 0.08515701443248533,
"calibration/coverage@30%": 0.1871101394324853,
"calibration/coverage@5%": 0.0007820144324853229,
"calibration/ece": 0.2287805560797294,
"calibration/mean_confidence": 0.4682809404281139,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0013671875,
"completions/max_length": 609.6,
"completions/max_terminated_length": 609.6,
"completions/mean_length": 105.6619140625,
"completions/mean_terminated_length": 105.80623168945313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 41.2,
"epoch": 0.144,
"grad_norm": 0.0022722198627889156,
"learning_rate": 1e-06,
"loss": -0.0007,
"num_tokens": 147665685.0,
"reward": 0.8855258345603942,
"reward_std": 0.13922611474990845,
"rewards/accuracy_reward": 0.484765625,
"rewards/brier_reward": 0.6958995342254639,
"rewards/confidence_uniqueness_reward": 0.9512767195701599,
"rewards/format_reward": 0.99755859375,
"rewards/frontier_aurc_reward": -0.00416339784860611,
"rewards/frontier_ece_reward": 0.001959370821714401,
"rewards/frontier_entropy_batch_reward": -0.20497798323631286,
"rewards/volume_coverage_0": 8.200039328110087e-11,
"rewards/volume_coverage_1": 8.200039328110087e-11,
"rewards/volume_coverage_10": -5.721240280143203e-10,
"rewards/volume_coverage_15": -5.721240280143203e-10,
"rewards/volume_coverage_20": -4.1206314610464643e-10,
"rewards/volume_coverage_25": -5.415158502164452e-10,
"rewards/volume_coverage_5": 8.200039328110087e-11,
"signal/accuracy_reward/centered_abs_mean": 0.17095947265625,
"signal/accuracy_reward/group_std_mean": 0.22554005682468414,
"signal/accuracy_reward/group_zero_std_frac": 0.35625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.085479736328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.085479736328125,
"signal/advantage_abs_mean": 0.10604511946439743,
"signal/advantage_pre_scale_abs_mean": 0.10604511946439743,
"signal/advantage_pre_scale_std": 0.15817178189754486,
"signal/advantage_std": 0.15817178189754486,
"signal/brier_reward/centered_abs_mean": 0.21632620096206664,
"signal/brier_reward/group_std_mean": 0.263543963432312,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021632620692253114,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.021632620692253114,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017246781662106515,
"signal/confidence_uniqueness_reward/group_std_mean": 0.028721674531698226,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017246782314032315,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017246782314032315,
"signal/format_reward/centered_abs_mean": 0.004718017578125,
"signal/format_reward/group_std_mean": 0.01347437030635774,
"signal/format_reward/group_zero_std_frac": 0.925,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0023590087890625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0023590087890625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021471860352903606,
"signal/frontier_aurc_reward/group_std_mean": 0.003108612261712551,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6839824204216712e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6839824204216712e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.06187872663140297,
"signal/frontier_ece_reward/group_std_mean": 0.0878511056303978,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006187872681766749,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006187872681766749,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.293925142288208,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37315127849578855,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0293925154954195,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0293925154954195,
"signal/volume_coverage_0/centered_abs_mean": 6.908986494025271e-10,
"signal/volume_coverage_0/group_std_mean": 8.69764738009593e-10,
"signal/volume_coverage_0/group_zero_std_frac": 0.9875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.908987236486918e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.908987236486918e-11,
"signal/volume_coverage_1/centered_abs_mean": 6.908986494025271e-10,
"signal/volume_coverage_1/group_std_mean": 8.69764738009593e-10,
"signal/volume_coverage_1/group_zero_std_frac": 0.9875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.908987236486918e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.908987236486918e-11,
"signal/volume_coverage_10/centered_abs_mean": 5.539405401044917e-09,
"signal/volume_coverage_10/group_std_mean": 7.0430595022763495e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.9375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.539405042304102e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 5.539405042304102e-10,
"signal/volume_coverage_15/centered_abs_mean": 5.539405401044917e-09,
"signal/volume_coverage_15/group_std_mean": 7.0430595022763495e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.9375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.539405042304102e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.539405042304102e-10,
"signal/volume_coverage_20/centered_abs_mean": 1.066335202914992e-08,
"signal/volume_coverage_20/group_std_mean": 1.3604591753146878e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.7875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.0663351941025968e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.0663351941025968e-09,
"signal/volume_coverage_25/centered_abs_mean": 1.540027541602207e-08,
"signal/volume_coverage_25/group_std_mean": 1.962254233545124e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.75,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.5400275683169485e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.5400275683169485e-09,
"signal/volume_coverage_5/centered_abs_mean": 6.908986494025271e-10,
"signal/volume_coverage_5/group_std_mean": 8.69764738009593e-10,
"signal/volume_coverage_5/group_zero_std_frac": 0.9875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.908987236486918e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 6.908987236486918e-11,
"step": 45
},
{
"calibration/aurc": 0.5383383939508992,
"calibration/batch_distribution_entropy": 0.9907718482645086,
"calibration/buffer_distribution_entropy": 0.9470614664673189,
"calibration/confidence_entropy": 0.5212315823406769,
"calibration/coverage@0%": 0.000392156862745098,
"calibration/coverage@1%": 0.000392156862745098,
"calibration/coverage@10%": 0.000392156862745098,
"calibration/coverage@15%": 0.000392156862745098,
"calibration/coverage@20%": 0.000392156862745098,
"calibration/coverage@25%": 0.0019577145926863897,
"calibration/coverage@30%": 0.0019577145926863897,
"calibration/coverage@5%": 0.000392156862745098,
"calibration/ece": 0.21815604108163839,
"calibration/mean_confidence": 0.5193597284981454,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0017578125,
"completions/max_length": 911.6,
"completions/max_terminated_length": 911.6,
"completions/mean_length": 107.3876953125,
"completions/mean_terminated_length": 107.57828369140626,
"completions/min_length": 0.0,
"completions/min_terminated_length": 42.6,
"epoch": 0.16,
"grad_norm": 0.0022806006018072367,
"learning_rate": 1e-06,
"loss": -0.0024,
"num_tokens": 163786263.0,
"reward": 0.866922116279602,
"reward_std": 0.1387272745370865,
"rewards/accuracy_reward": 0.4404296875,
"rewards/brier_reward": 0.6948660254478455,
"rewards/confidence_uniqueness_reward": 0.9550655364990235,
"rewards/format_reward": 0.99716796875,
"rewards/frontier_aurc_reward": -0.004550308641046286,
"rewards/frontier_ece_reward": -0.00014091167831793429,
"rewards/frontier_entropy_batch_reward": -0.16798928380012512,
"rewards/volume_coverage_0": -2.5247680335294122e-11,
"rewards/volume_coverage_1": -2.5247680335294122e-11,
"rewards/volume_coverage_10": -8.908247889349851e-11,
"rewards/volume_coverage_15": 4.961911284628861e-10,
"rewards/volume_coverage_20": 4.287086199927792e-10,
"rewards/volume_coverage_25": 5.975818465064897e-10,
"rewards/volume_coverage_5": -2.5247680335294122e-11,
"signal/accuracy_reward/centered_abs_mean": 0.16611328125,
"signal/accuracy_reward/group_std_mean": 0.21109898686408995,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.083056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.083056640625,
"signal/advantage_abs_mean": 0.10861865431070328,
"signal/advantage_pre_scale_abs_mean": 0.10861865431070328,
"signal/advantage_pre_scale_std": 0.16101040244102477,
"signal/advantage_std": 0.16101040244102477,
"signal/brier_reward/centered_abs_mean": 0.22490673661231994,
"signal/brier_reward/group_std_mean": 0.2718587577342987,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022490674629807472,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.022490674629807472,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015694119967520237,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026635773852467536,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001569412089884281,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001569412089884281,
"signal/format_reward/centered_abs_mean": 0.005352783203125,
"signal/format_reward/group_std_mean": 0.014074762351810932,
"signal/format_reward/group_zero_std_frac": 0.925,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0026763916015625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0026763916015625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002780256886035204,
"signal/frontier_aurc_reward/group_std_mean": 0.003884653048589826,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.475321209407411e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.475321209407411e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.0696753516793251,
"signal/frontier_ece_reward/group_std_mean": 0.09413132518529892,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0069675354287028314,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0069675354287028314,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2549823522567749,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33368061780929564,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02549823671579361,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02549823671579361,
"signal/volume_coverage_0/centered_abs_mean": 1.649701220074462e-09,
"signal/volume_coverage_0/group_std_mean": 2.0688173063554415e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.95,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.6497011742777623e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.6497011742777623e-10,
"signal/volume_coverage_1/centered_abs_mean": 1.649701220074462e-09,
"signal/volume_coverage_1/group_std_mean": 2.0688173063554415e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.95,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.6497011742777623e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.6497011742777623e-10,
"signal/volume_coverage_10/centered_abs_mean": 4.160429378785579e-09,
"signal/volume_coverage_10/group_std_mean": 5.213142106885727e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.95,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.160429022126433e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.160429022126433e-10,
"signal/volume_coverage_15/centered_abs_mean": 8.538843809802187e-09,
"signal/volume_coverage_15/group_std_mean": 1.0691787555305866e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.9,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.538843764005488e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 8.538843764005488e-10,
"signal/volume_coverage_20/centered_abs_mean": 1.1193042093182726e-08,
"signal/volume_coverage_20/group_std_mean": 1.4015788185606227e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.9,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.1193041514478975e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.1193041514478975e-09,
"signal/volume_coverage_25/centered_abs_mean": 1.2908310909054422e-08,
"signal/volume_coverage_25/group_std_mean": 1.6175339656587796e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.8875,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.290830988626146e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.290830988626146e-09,
"signal/volume_coverage_5/centered_abs_mean": 1.649701220074462e-09,
"signal/volume_coverage_5/group_std_mean": 2.0688173063554415e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.95,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.6497011742777623e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.6497011742777623e-10,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.6806264122050393,
"eval_calibration/batch_distribution_entropy": 0.9356194614467648,
"eval_calibration/buffer_distribution_entropy": 0.9527854156241127,
"eval_calibration/confidence_entropy": 0.5131242003432436,
"eval_calibration/coverage@0%": 0.0,
"eval_calibration/coverage@1%": 0.0,
"eval_calibration/coverage@10%": 0.0,
"eval_calibration/coverage@15%": 0.0,
"eval_calibration/coverage@20%": 0.0,
"eval_calibration/coverage@25%": 0.0,
"eval_calibration/coverage@30%": 0.0,
"eval_calibration/coverage@5%": 0.0,
"eval_calibration/ece": 0.36174847179112873,
"eval_calibration/mean_confidence": 0.5375307711314923,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 500.25,
"eval_completions/max_terminated_length": 500.25,
"eval_completions/mean_length": 114.09334564208984,
"eval_completions/mean_terminated_length": 114.30531311035156,
"eval_completions/min_length": 38.75,
"eval_completions/min_terminated_length": 52.25,
"eval_loss": 0.0,
"eval_num_tokens": 163786263.0,
"eval_reward": 0.7351007908582687,
"eval_reward_std": 0.25359319150447845,
"eval_rewards/accuracy_reward": 0.357421875,
"eval_rewards/brier_reward": 0.6816919445991516,
"eval_rewards/confidence_uniqueness_reward": 0.9017924666404724,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.0053580960957333446,
"eval_rewards/frontier_ece_reward": -0.011103931348770857,
"eval_rewards/frontier_entropy_batch_reward": -0.998046875,
"eval_rewards/volume_coverage_0": 1.9699982478638134e-09,
"eval_rewards/volume_coverage_1": 1.9699982478638134e-09,
"eval_rewards/volume_coverage_10": 8.171635479392592e-09,
"eval_rewards/volume_coverage_15": 1.0906613390204711e-08,
"eval_rewards/volume_coverage_20": 2.7413089020988934e-08,
"eval_rewards/volume_coverage_25": 4.180572066303512e-08,
"eval_rewards/volume_coverage_5": 1.9699982478638134e-09,
"eval_runtime": 32.3428,
"eval_samples_per_second": 15.459,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4473876953125,
"eval_signal/accuracy_reward/group_std_mean": 0.47973204404115677,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22369384765625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22369384765625,
"eval_signal/advantage_abs_mean": 0.23301321640610695,
"eval_signal/advantage_pre_scale_abs_mean": 0.23301321640610695,
"eval_signal/advantage_pre_scale_std": 0.25110187008976936,
"eval_signal/advantage_std": 0.25110187008976936,
"eval_signal/brier_reward/centered_abs_mean": 0.24164819344878197,
"eval_signal/brier_reward/group_std_mean": 0.2891644388437271,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02416481962427497,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02416481962427497,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.03665702510625124,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04689502716064453,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00366570265032351,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00366570265032351,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003946851065848023,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005488026305101812,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9335638323100284e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9335638323100284e-05,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.07612928375601768,
"eval_signal/frontier_ece_reward/group_std_mean": 0.10946918278932571,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007612928398884833,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007612928398884833,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003784179862122983,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003784179862122983,
"eval_signal/volume_coverage_0/centered_abs_mean": 6.782286787743175e-09,
"eval_signal/volume_coverage_0/group_std_mean": 9.168166559270219e-09,
"eval_signal/volume_coverage_0/group_zero_std_frac": 0.8125,
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.78228681549875e-10,
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 6.78228681549875e-10,
"eval_signal/volume_coverage_1/centered_abs_mean": 6.782286787743175e-09,
"eval_signal/volume_coverage_1/group_std_mean": 9.168166559270219e-09,
"eval_signal/volume_coverage_1/group_zero_std_frac": 0.8125,
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.78228681549875e-10,
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 6.78228681549875e-10,
"eval_signal/volume_coverage_10/centered_abs_mean": 2.1339159606004188e-08,
"eval_signal/volume_coverage_10/group_std_mean": 2.773909602016289e-08,
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.75,
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.133916078561615e-09,
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 2.133916078561615e-09,
"eval_signal/volume_coverage_15/centered_abs_mean": 2.9500576481655827e-08,
"eval_signal/volume_coverage_15/group_std_mean": 3.8776015842678646e-08,
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.625,
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.950057677308937e-09,
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 2.950057677308937e-09,
"eval_signal/volume_coverage_20/centered_abs_mean": 8.900886916407558e-08,
"eval_signal/volume_coverage_20/group_std_mean": 1.1780899766833386e-07,
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.4375,
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 8.900887110696587e-09,
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 8.900887110696587e-09,
"eval_signal/volume_coverage_25/centered_abs_mean": 1.415008363814252e-07,
"eval_signal/volume_coverage_25/group_std_mean": 1.8812330493389595e-07,
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.375,
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.415008371585813e-08,
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 1.415008371585813e-08,
"eval_signal/volume_coverage_5/centered_abs_mean": 6.782286787743175e-09,
"eval_signal/volume_coverage_5/group_std_mean": 9.168166559270219e-09,
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.8125,
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.78228681549875e-10,
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 6.78228681549875e-10,
"eval_steps_per_second": 0.124,
"step": 50
},
{
"calibration/aurc": 0.513497304567586,
"calibration/batch_distribution_entropy": 0.9930806560395098,
"calibration/buffer_distribution_entropy": 0.9559643505400857,
"calibration/confidence_entropy": 0.5101240746310837,
"calibration/coverage@0%": 0.00078125,
"calibration/coverage@1%": 0.00078125,
"calibration/coverage@10%": 0.00078125,
"calibration/coverage@15%": 0.00078125,
"calibration/coverage@20%": 0.00078125,
"calibration/coverage@25%": 0.00078125,
"calibration/coverage@30%": 0.00078125,
"calibration/coverage@5%": 0.00078125,
"calibration/ece": 0.23066903954651802,
"calibration/mean_confidence": 0.5127820021193525,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00087890625,
"completions/max_length": 934.2,
"completions/max_terminated_length": 934.2,
"completions/mean_length": 117.21943359375,
"completions/mean_terminated_length": 117.32320404052734,
"completions/min_length": 0.0,
"completions/min_terminated_length": 45.8,
"epoch": 0.176,
"grad_norm": 0.001983851892873645,
"learning_rate": 1e-06,
"loss": -0.0012,
"num_tokens": 180223710.0,
"reward": 0.8638802170753479,
"reward_std": 0.13377538919448853,
"rewards/accuracy_reward": 0.43115234375,
"rewards/brier_reward": 0.6899467229843139,
"rewards/confidence_uniqueness_reward": 0.9573141932487488,
"rewards/format_reward": 0.99873046875,
"rewards/frontier_aurc_reward": -0.004610071796923876,
"rewards/frontier_ece_reward": -0.002095718286000192,
"rewards/frontier_entropy_batch_reward": -0.15520085394382477,
"rewards/volume_coverage_0": 9.170957243620492e-10,
"rewards/volume_coverage_1": 9.170957243620492e-10,
"rewards/volume_coverage_10": 9.170957243620492e-10,
"rewards/volume_coverage_15": 3.3474374161457377e-09,
"rewards/volume_coverage_20": 4.455243285406141e-09,
"rewards/volume_coverage_25": 1.2006583771650981e-08,
"rewards/volume_coverage_5": 9.170957243620492e-10,
"signal/accuracy_reward/centered_abs_mean": 0.159344482421875,
"signal/accuracy_reward/group_std_mean": 0.20625897049903869,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0796722412109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0796722412109375,
"signal/advantage_abs_mean": 0.10355194658041,
"signal/advantage_pre_scale_abs_mean": 0.10355194658041,
"signal/advantage_pre_scale_std": 0.1553642988204956,
"signal/advantage_std": 0.1553642988204956,
"signal/brier_reward/centered_abs_mean": 0.23099083304405213,
"signal/brier_reward/group_std_mean": 0.27860647439956665,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023099084198474885,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023099084198474885,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01309113372117281,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020391806587576868,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013091133907437325,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013091133907437325,
"signal/format_reward/centered_abs_mean": 0.002459716796875,
"signal/format_reward/group_std_mean": 0.007181552983820438,
"signal/format_reward/group_zero_std_frac": 0.959375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029003784526139497,
"signal/frontier_aurc_reward/group_std_mean": 0.0041275909170508385,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.625473182182759e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.625473182182759e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.07209287136793137,
"signal/frontier_ece_reward/group_std_mean": 0.09668067246675491,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007209287490695715,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007209287490695715,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24166457653045653,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3223122775554657,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02416645921766758,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02416645921766758,
"signal/volume_coverage_0/centered_abs_mean": 2.355328121428357e-09,
"signal/volume_coverage_0/group_std_mean": 2.991094849580378e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.940625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.355328147796154e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.355328147796154e-10,
"signal/volume_coverage_1/centered_abs_mean": 2.355328121428357e-09,
"signal/volume_coverage_1/group_std_mean": 2.991094849580378e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.940625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.355328147796154e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.355328147796154e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.355328121428357e-09,
"signal/volume_coverage_10/group_std_mean": 2.991094849580378e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.940625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.355328147796154e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.355328147796154e-10,
"signal/volume_coverage_15/centered_abs_mean": 1.483570165605741e-08,
"signal/volume_coverage_15/group_std_mean": 1.9023410724461343e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.890625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4835702936977225e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.4835702936977225e-09,
"signal/volume_coverage_20/centered_abs_mean": 1.9023089892211686e-08,
"signal/volume_coverage_20/group_std_mean": 2.437677687483486e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.765625,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.9023090734593405e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.9023090734593405e-09,
"signal/volume_coverage_25/centered_abs_mean": 4.2793483723269075e-08,
"signal/volume_coverage_25/group_std_mean": 5.4967429463648186e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.746875,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 4.279348483210432e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 4.279348483210432e-09,
"signal/volume_coverage_5/centered_abs_mean": 2.355328121428357e-09,
"signal/volume_coverage_5/group_std_mean": 2.991094849580378e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.940625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.355328147796154e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.355328147796154e-10,
"step": 55
},
{
"calibration/aurc": 0.43183402035041557,
"calibration/batch_distribution_entropy": 0.9931826995129007,
"calibration/buffer_distribution_entropy": 0.9634033378659635,
"calibration/confidence_entropy": 0.5033983537357946,
"calibration/coverage@0%": 0.004696673189823875,
"calibration/coverage@1%": 0.004696673189823875,
"calibration/coverage@10%": 0.006653620352250489,
"calibration/coverage@15%": 0.006653620352250489,
"calibration/coverage@20%": 0.011741682974559686,
"calibration/coverage@25%": 0.04227005870841487,
"calibration/coverage@30%": 0.050097847358121325,
"calibration/coverage@5%": 0.004696673189823875,
"calibration/ece": 0.17537742447414378,
"calibration/mean_confidence": 0.47045836480735737,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001171875,
"completions/max_length": 660.6,
"completions/max_terminated_length": 660.6,
"completions/mean_length": 123.246875,
"completions/mean_terminated_length": 123.39363555908203,
"completions/min_length": 8.2,
"completions/min_terminated_length": 47.8,
"epoch": 0.192,
"grad_norm": 0.001683655777014792,
"learning_rate": 1e-06,
"loss": -0.001,
"num_tokens": 196300574.0,
"reward": 0.888569688796997,
"reward_std": 0.12694563418626786,
"rewards/accuracy_reward": 0.47666015625,
"rewards/brier_reward": 0.6981294512748718,
"rewards/confidence_uniqueness_reward": 0.9559247612953186,
"rewards/format_reward": 0.9984375,
"rewards/frontier_aurc_reward": -0.004030398419126868,
"rewards/frontier_ece_reward": 0.006262005632743239,
"rewards/frontier_entropy_batch_reward": -0.14960378110408784,
"rewards/volume_coverage_0": 3.1163899759378835e-11,
"rewards/volume_coverage_1": 3.1163899759378835e-11,
"rewards/volume_coverage_10": 3.1163899759378835e-11,
"rewards/volume_coverage_15": 3.8662728279320604e-10,
"rewards/volume_coverage_20": 5.279858298831708e-10,
"rewards/volume_coverage_25": 9.922049041752312e-10,
"rewards/volume_coverage_5": 3.1163899759378835e-11,
"signal/accuracy_reward/centered_abs_mean": 0.146868896484375,
"signal/accuracy_reward/group_std_mean": 0.1948981136083603,
"signal/accuracy_reward/group_zero_std_frac": 0.4375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0734344482421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0734344482421875,
"signal/advantage_abs_mean": 0.09662168025970459,
"signal/advantage_pre_scale_abs_mean": 0.09662168025970459,
"signal/advantage_pre_scale_std": 0.14739495515823364,
"signal/advantage_std": 0.14739495515823364,
"signal/brier_reward/centered_abs_mean": 0.23383308947086334,
"signal/brier_reward/group_std_mean": 0.28255713582038877,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023383309692144395,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023383309692144395,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013910266570746899,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022051481157541276,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013910266570746899,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013910266570746899,
"signal/format_reward/centered_abs_mean": 0.00301513671875,
"signal/format_reward/group_std_mean": 0.008502526301890611,
"signal/format_reward/group_zero_std_frac": 0.953125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001507568359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001507568359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026706780306994917,
"signal/frontier_aurc_reward/group_std_mean": 0.0038585856091231108,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.338347669341601e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.338347669341601e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.06825752407312394,
"signal/frontier_ece_reward/group_std_mean": 0.09220706075429916,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006825752649456262,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006825752649456262,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23939733803272248,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3223705470561981,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02393973395228386,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02393973395228386,
"signal/volume_coverage_0/centered_abs_mean": 4.1413477647189725e-10,
"signal/volume_coverage_0/group_std_mean": 5.290546389868922e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.141347924313532e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.141347924313532e-11,
"signal/volume_coverage_1/centered_abs_mean": 4.1413477647189725e-10,
"signal/volume_coverage_1/group_std_mean": 5.290546389868922e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.141347924313532e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.141347924313532e-11,
"signal/volume_coverage_10/centered_abs_mean": 4.1413477647189725e-10,
"signal/volume_coverage_10/group_std_mean": 5.290546389868922e-10,
"signal/volume_coverage_10/group_zero_std_frac": 1.0,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.141347924313532e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.141347924313532e-11,
"signal/volume_coverage_15/centered_abs_mean": 2.613625393887986e-09,
"signal/volume_coverage_15/group_std_mean": 3.2837682240050014e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.903125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.6136253682140785e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.6136253682140785e-10,
"signal/volume_coverage_20/centered_abs_mean": 3.437437523512443e-09,
"signal/volume_coverage_20/group_std_mean": 4.3065619848015045e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.903125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.437437275793931e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.437437275793931e-10,
"signal/volume_coverage_25/centered_abs_mean": 5.901994704071711e-09,
"signal/volume_coverage_25/group_std_mean": 7.4068437938556995e-09,
"signal/volume_coverage_25/group_zero_std_frac": 0.846875,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 5.90199494485133e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 5.90199494485133e-10,
"signal/volume_coverage_5/centered_abs_mean": 4.1413477647189725e-10,
"signal/volume_coverage_5/group_std_mean": 5.290546389868922e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.141347924313532e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.141347924313532e-11,
"step": 60
},
{
"calibration/aurc": 0.3976681614269202,
"calibration/batch_distribution_entropy": 0.9959729508671309,
"calibration/buffer_distribution_entropy": 0.9696937379789341,
"calibration/confidence_entropy": 0.5022068227163163,
"calibration/coverage@0%": 0.004691322162426614,
"calibration/coverage@1%": 0.004691322162426614,
"calibration/coverage@10%": 0.010550697162426615,
"calibration/coverage@15%": 0.021878822162426613,
"calibration/coverage@20%": 0.11690083781800391,
"calibration/coverage@25%": 0.25801278131115457,
"calibration/coverage@30%": 0.4043052837573386,
"calibration/coverage@5%": 0.004691322162426614,
"calibration/ece": 0.22137551214873522,
"calibration/mean_confidence": 0.49639520530660997,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 909.6,
"completions/max_terminated_length": 909.6,
"completions/mean_length": 129.9103515625,
"completions/mean_terminated_length": 130.04986267089845,
"completions/min_length": 0.0,
"completions/min_terminated_length": 51.0,
"epoch": 0.208,
"grad_norm": 0.0016923915827646852,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 212663080.0,
"reward": 0.9061551690101624,
"reward_std": 0.1243842214345932,
"rewards/accuracy_reward": 0.50966796875,
"rewards/brier_reward": 0.702867615222931,
"rewards/confidence_uniqueness_reward": 0.9556405544281006,
"rewards/format_reward": 0.9982421875,
"rewards/frontier_aurc_reward": -0.003709340374916792,
"rewards/frontier_ece_reward": 0.009891654085367917,
"rewards/frontier_entropy_batch_reward": -0.14593522846698762,
"rewards/volume_coverage_0": 2.715153962773442e-10,
"rewards/volume_coverage_1": 2.715153962773442e-10,
"rewards/volume_coverage_10": 4.856030966227109e-10,
"rewards/volume_coverage_15": 1.0036363989884834e-09,
"rewards/volume_coverage_20": 1.3117872517398599e-09,
"rewards/volume_coverage_25": 1.9953037999620447e-09,
"rewards/volume_coverage_5": 2.715153962773442e-10,
"signal/accuracy_reward/centered_abs_mean": 0.142767333984375,
"signal/accuracy_reward/group_std_mean": 0.1889306128025055,
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0713836669921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0713836669921875,
"signal/advantage_abs_mean": 0.09461160451173782,
"signal/advantage_pre_scale_abs_mean": 0.09461160451173782,
"signal/advantage_pre_scale_std": 0.14617311358451843,
"signal/advantage_std": 0.14617311358451843,
"signal/brier_reward/centered_abs_mean": 0.22771627008914946,
"signal/brier_reward/group_std_mean": 0.2767902910709381,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022771627083420752,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.022771627083420752,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013186541199684144,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02205616645514965,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013186540920287371,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013186540920287371,
"signal/format_reward/centered_abs_mean": 0.00340576171875,
"signal/format_reward/group_std_mean": 0.009943688940256833,
"signal/format_reward/group_zero_std_frac": 0.94375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001702880859375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001702880859375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026509815361350775,
"signal/frontier_aurc_reward/group_std_mean": 0.0038590433076024057,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3137269201688466e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3137269201688466e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.06532840430736542,
"signal/frontier_ece_reward/group_std_mean": 0.08815628290176392,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006532840337604285,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006532840337604285,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23155330419540404,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31283451318740846,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02315533086657524,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02315533086657524,
"signal/volume_coverage_0/centered_abs_mean": 1.2957021122694811e-09,
"signal/volume_coverage_0/group_std_mean": 1.6053079698874484e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.946875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.2957022017812124e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.2957022017812124e-10,
"signal/volume_coverage_1/centered_abs_mean": 1.2957021122694811e-09,
"signal/volume_coverage_1/group_std_mean": 1.6053079698874484e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.946875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.2957022017812124e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.2957022017812124e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.607133697818398e-09,
"signal/volume_coverage_10/group_std_mean": 3.2581726594482776e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.896875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.60713383173905e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.60713383173905e-10,
"signal/volume_coverage_15/centered_abs_mean": 4.622650684460438e-09,
"signal/volume_coverage_15/group_std_mean": 5.792726703868212e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.8,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.6226512097347073e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 4.6226512097347073e-10,
"signal/volume_coverage_20/centered_abs_mean": 7.073875726848477e-09,
"signal/volume_coverage_20/group_std_mean": 8.8870939773944e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.746875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.073875627622295e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 7.073875627622295e-10,
"signal/volume_coverage_25/centered_abs_mean": 9.611973039724831e-09,
"signal/volume_coverage_25/group_std_mean": 1.2051949904723358e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.746875,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 9.611973272871665e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 9.611973272871665e-10,
"signal/volume_coverage_5/centered_abs_mean": 1.2957021122694811e-09,
"signal/volume_coverage_5/group_std_mean": 1.6053079698874484e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.946875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2957022017812124e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.2957022017812124e-10,
"step": 65
},
{
"calibration/aurc": 0.3912217986078258,
"calibration/batch_distribution_entropy": 0.9941200245807609,
"calibration/buffer_distribution_entropy": 0.9744661505967608,
"calibration/confidence_entropy": 0.514071657701208,
"calibration/coverage@0%": 0.0062660680710640414,
"calibration/coverage@1%": 0.0062660680710640414,
"calibration/coverage@10%": 0.0062660680710640414,
"calibration/coverage@15%": 0.019191919918652392,
"calibration/coverage@20%": 0.09790743651912819,
"calibration/coverage@25%": 0.1472402878103296,
"calibration/coverage@30%": 0.2897601330772035,
"calibration/coverage@5%": 0.0062660680710640414,
"calibration/ece": 0.1981607008778328,
"calibration/mean_confidence": 0.494655158586494,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0013671875,
"completions/max_length": 772.2,
"completions/max_terminated_length": 772.2,
"completions/mean_length": 133.83466796875,
"completions/mean_terminated_length": 134.01897583007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 53.2,
"epoch": 0.224,
"grad_norm": 0.0014168552588671446,
"learning_rate": 1e-06,
"loss": -0.0017,
"num_tokens": 229186731.0,
"reward": 0.8921693325042724,
"reward_std": 0.11579417437314987,
"rewards/accuracy_reward": 0.48359375,
"rewards/brier_reward": 0.7135935187339782,
"rewards/confidence_uniqueness_reward": 0.9547363996505738,
"rewards/format_reward": 0.99833984375,
"rewards/frontier_aurc_reward": -0.003805333934724331,
"rewards/frontier_ece_reward": 0.008672526269219816,
"rewards/frontier_entropy_batch_reward": -0.16450113654136658,
"rewards/volume_coverage_0": 7.487820780083254e-11,
"rewards/volume_coverage_1": 7.487820780083254e-11,
"rewards/volume_coverage_10": 7.487820780083254e-11,
"rewards/volume_coverage_15": 5.375929285067471e-10,
"rewards/volume_coverage_20": 7.936750173742624e-10,
"rewards/volume_coverage_25": 1.4622947658304851e-09,
"rewards/volume_coverage_5": 7.487820780083254e-11,
"signal/accuracy_reward/centered_abs_mean": 0.12554931640625,
"signal/accuracy_reward/group_std_mean": 0.16789826452732087,
"signal/accuracy_reward/group_zero_std_frac": 0.515625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062774658203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062774658203125,
"signal/advantage_abs_mean": 0.088116155564785,
"signal/advantage_pre_scale_abs_mean": 0.088116155564785,
"signal/advantage_pre_scale_std": 0.1376793324947357,
"signal/advantage_std": 0.1376793324947357,
"signal/brier_reward/centered_abs_mean": 0.21528012156486512,
"signal/brier_reward/group_std_mean": 0.26321386098861693,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021528012305498122,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.021528012305498122,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01388053372502327,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02223154343664646,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013880533864721657,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013880533864721657,
"signal/format_reward/centered_abs_mean": 0.003204345703125,
"signal/format_reward/group_std_mean": 0.009054953418672084,
"signal/format_reward/group_zero_std_frac": 0.95,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016021728515625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0016021728515625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00263366037979722,
"signal/frontier_aurc_reward/group_std_mean": 0.0038668750785291195,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.292075634817593e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.292075634817593e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.060927893966436386,
"signal/frontier_ece_reward/group_std_mean": 0.08261417448520661,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006092789676040411,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006092789676040411,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2596702575683594,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34350005388259885,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02596702575683594,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02596702575683594,
"signal/volume_coverage_0/centered_abs_mean": 2.6789099966118e-10,
"signal/volume_coverage_0/group_std_mean": 3.3500919399687135e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.6789100625312922e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.6789100625312922e-11,
"signal/volume_coverage_1/centered_abs_mean": 2.6789099966118e-10,
"signal/volume_coverage_1/group_std_mean": 3.3500919399687135e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.6789100625312922e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.6789100625312922e-11,
"signal/volume_coverage_10/centered_abs_mean": 2.6789099966118e-10,
"signal/volume_coverage_10/group_std_mean": 3.3500919399687135e-10,
"signal/volume_coverage_10/group_zero_std_frac": 1.0,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.6789100625312922e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.6789100625312922e-11,
"signal/volume_coverage_15/centered_abs_mean": 1.3681588584280035e-09,
"signal/volume_coverage_15/group_std_mean": 1.7402577712211098e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.915625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.3681589680625272e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.3681589680625272e-10,
"signal/volume_coverage_20/centered_abs_mean": 2.2410811706397025e-09,
"signal/volume_coverage_20/group_std_mean": 2.872985727098154e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.909375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.241081313580917e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.241081313580917e-10,
"signal/volume_coverage_25/centered_abs_mean": 3.5609483861609446e-09,
"signal/volume_coverage_25/group_std_mean": 4.546949861072846e-09,
"signal/volume_coverage_25/group_zero_std_frac": 0.85,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.5609485707355224e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.5609485707355224e-10,
"signal/volume_coverage_5/centered_abs_mean": 2.6789099966118e-10,
"signal/volume_coverage_5/group_std_mean": 3.3500919399687135e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.6789100625312922e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.6789100625312922e-11,
"step": 70
},
{
"calibration/aurc": 0.420320349024755,
"calibration/batch_distribution_entropy": 0.9855579386582516,
"calibration/buffer_distribution_entropy": 0.9778695258036263,
"calibration/confidence_entropy": 0.5230157101208323,
"calibration/coverage@0%": 0.000390625,
"calibration/coverage@1%": 0.000390625,
"calibration/coverage@10%": 0.000390625,
"calibration/coverage@15%": 0.10157778864970646,
"calibration/coverage@20%": 0.20821917808219176,
"calibration/coverage@25%": 0.21291585127201565,
"calibration/coverage@30%": 0.21839530332681018,
"calibration/coverage@5%": 0.000390625,
"calibration/ece": 0.20896871328557004,
"calibration/mean_confidence": 0.505650939324179,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 643.6,
"completions/max_terminated_length": 643.6,
"completions/mean_length": 139.76630859375,
"completions/mean_terminated_length": 139.8614074707031,
"completions/min_length": 23.2,
"completions/min_terminated_length": 56.4,
"epoch": 0.24,
"grad_norm": 0.0017766956007108092,
"learning_rate": 1e-06,
"loss": -0.0009,
"num_tokens": 245869618.0,
"reward": 0.9217252969741822,
"reward_std": 0.12044314593076706,
"rewards/accuracy_reward": 0.53876953125,
"rewards/brier_reward": 0.7134970784187317,
"rewards/confidence_uniqueness_reward": 0.955771553516388,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.003404102800413966,
"rewards/frontier_ece_reward": 0.012143013067543507,
"rewards/frontier_entropy_batch_reward": -0.15318614840507508,
"rewards/volume_coverage_0": 3.451310973123789e-11,
"rewards/volume_coverage_1": 3.451310973123789e-11,
"rewards/volume_coverage_10": 3.451310973123789e-11,
"rewards/volume_coverage_15": 3.451310973123789e-11,
"rewards/volume_coverage_20": -3.028895327605863e-09,
"rewards/volume_coverage_25": 1.9700875912576078e-08,
"rewards/volume_coverage_5": 3.451310973123789e-11,
"signal/accuracy_reward/centered_abs_mean": 0.140142822265625,
"signal/accuracy_reward/group_std_mean": 0.18538169860839843,
"signal/accuracy_reward/group_zero_std_frac": 0.465625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0700714111328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0700714111328125,
"signal/advantage_abs_mean": 0.09228953570127488,
"signal/advantage_pre_scale_abs_mean": 0.09228953570127488,
"signal/advantage_pre_scale_std": 0.14287880957126617,
"signal/advantage_std": 0.14287880957126617,
"signal/brier_reward/centered_abs_mean": 0.21166147887706757,
"signal/brier_reward/group_std_mean": 0.2590271383523941,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02116614766418934,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02116614766418934,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012101791240274907,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017954951152205467,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001210179179906845,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001210179179906845,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_std_mean": 0.004971844423562288,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026966645382344723,
"signal/frontier_aurc_reward/group_std_mean": 0.00390788302756846,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3708307819324544e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3708307819324544e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.05930803343653679,
"signal/frontier_ece_reward/group_std_mean": 0.08072617352008819,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0059308033436536785,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0059308033436536785,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23889875411987305,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.317973917722702,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023889876157045364,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023889876157045364,
"signal/volume_coverage_0/centered_abs_mean": 2.0800886846306099e-10,
"signal/volume_coverage_0/group_std_mean": 2.6474508141305366e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_1/centered_abs_mean": 2.0800886846306099e-10,
"signal/volume_coverage_1/group_std_mean": 2.6474508141305366e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_10/centered_abs_mean": 2.0800886846306099e-10,
"signal/volume_coverage_10/group_std_mean": 2.6474508141305366e-10,
"signal/volume_coverage_10/group_zero_std_frac": 1.0,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_15/centered_abs_mean": 2.0800886846306099e-10,
"signal/volume_coverage_15/group_std_mean": 2.6474508141305366e-10,
"signal/volume_coverage_15/group_zero_std_frac": 1.0,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_20/centered_abs_mean": 6.613854078296732e-09,
"signal/volume_coverage_20/group_std_mean": 8.326563194493985e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.88125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.613853915926615e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 6.613853915926615e-10,
"signal/volume_coverage_25/centered_abs_mean": 5.696032007451723e-08,
"signal/volume_coverage_25/group_std_mean": 7.250977392958547e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.83125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 5.696031910029653e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 5.696031910029653e-09,
"signal/volume_coverage_5/centered_abs_mean": 2.0800886846306099e-10,
"signal/volume_coverage_5/group_std_mean": 2.6474508141305366e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.0800886707528222e-11,
"step": 75
},
{
"calibration/aurc": 0.3544769332207135,
"calibration/batch_distribution_entropy": 0.9897546231661192,
"calibration/buffer_distribution_entropy": 0.9801283074949945,
"calibration/confidence_entropy": 0.5082607589543467,
"calibration/coverage@0%": 0.002735139432485323,
"calibration/coverage@1%": 0.002735139432485323,
"calibration/coverage@10%": 0.025782014432485324,
"calibration/coverage@15%": 0.03203201443248532,
"calibration/coverage@20%": 0.11328201443248531,
"calibration/coverage@25%": 0.33795942392367906,
"calibration/coverage@30%": 0.47439227617416824,
"calibration/coverage@5%": 0.010547639432485323,
"calibration/ece": 0.176115213516259,
"calibration/mean_confidence": 0.5356183375328162,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00087890625,
"completions/max_length": 524.0,
"completions/max_terminated_length": 524.0,
"completions/mean_length": 144.349609375,
"completions/mean_terminated_length": 144.47740173339844,
"completions/min_length": 11.6,
"completions/min_terminated_length": 57.4,
"epoch": 0.256,
"grad_norm": 0.001462550018914044,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 262402574.0,
"reward": 0.9123332858085632,
"reward_std": 0.11308208853006363,
"rewards/accuracy_reward": 0.51884765625,
"rewards/brier_reward": 0.7276524066925049,
"rewards/confidence_uniqueness_reward": 0.9561123728752137,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.003458845429122448,
"rewards/frontier_ece_reward": 0.015241177752614021,
"rewards/frontier_entropy_batch_reward": -0.16508454382419585,
"rewards/volume_coverage_0": -3.699867013240085e-11,
"rewards/volume_coverage_1": -3.699867013240085e-11,
"rewards/volume_coverage_10": -3.699867013240085e-11,
"rewards/volume_coverage_15": -8.109867156080516e-11,
"rewards/volume_coverage_20": -7.289573838217128e-12,
"rewards/volume_coverage_25": 3.8061882085774904e-11,
"rewards/volume_coverage_5": -3.699867013240085e-11,
"signal/accuracy_reward/centered_abs_mean": 0.124127197265625,
"signal/accuracy_reward/group_std_mean": 0.1660928785800934,
"signal/accuracy_reward/group_zero_std_frac": 0.5125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0620635986328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0620635986328125,
"signal/advantage_abs_mean": 0.08623839318752288,
"signal/advantage_pre_scale_abs_mean": 0.08623839318752288,
"signal/advantage_pre_scale_std": 0.1374477416276932,
"signal/advantage_std": 0.1374477416276932,
"signal/brier_reward/centered_abs_mean": 0.20527395308017732,
"signal/brier_reward/group_std_mean": 0.2524005711078644,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020527396351099014,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020527396351099014,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012610967457294463,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01848965808749199,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012610967503860592,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012610967503860592,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_std_mean": 0.004971844423562288,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028766633477061987,
"signal/frontier_aurc_reward/group_std_mean": 0.004133455315604806,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.595829330151901e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.595829330151901e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.05877775847911835,
"signal/frontier_ece_reward/group_std_mean": 0.07837124764919282,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0058777758851647375,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0058777758851647375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2506664037704468,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3321652948856354,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025066639855504037,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025066639855504037,
"signal/volume_coverage_0/centered_abs_mean": 7.425148794426573e-10,
"signal/volume_coverage_0/group_std_mean": 9.40662159276684e-10,
"signal/volume_coverage_0/group_zero_std_frac": 0.9625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.425148976572537e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.425148976572537e-11,
"signal/volume_coverage_1/centered_abs_mean": 7.425148794426573e-10,
"signal/volume_coverage_1/group_std_mean": 9.40662159276684e-10,
"signal/volume_coverage_1/group_zero_std_frac": 0.9625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.425148976572537e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.425148976572537e-11,
"signal/volume_coverage_10/centered_abs_mean": 7.425148794426573e-10,
"signal/volume_coverage_10/group_std_mean": 9.40662159276684e-10,
"signal/volume_coverage_10/group_zero_std_frac": 0.9625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.425148976572537e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 7.425148976572537e-11,
"signal/volume_coverage_15/centered_abs_mean": 8.75433755348487e-10,
"signal/volume_coverage_15/group_std_mean": 1.108826042095501e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.953125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.75433795767544e-11,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 8.75433795767544e-11,
"signal/volume_coverage_20/centered_abs_mean": 1.2596845698487068e-09,
"signal/volume_coverage_20/group_std_mean": 1.593373660790931e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.953125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.2596845936144184e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.2596845936144184e-10,
"signal/volume_coverage_25/centered_abs_mean": 1.3196504354551219e-09,
"signal/volume_coverage_25/group_std_mean": 1.6703829597286557e-09,
"signal/volume_coverage_25/group_zero_std_frac": 0.953125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.3196504661597274e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.3196504661597274e-10,
"signal/volume_coverage_5/centered_abs_mean": 7.425148794426573e-10,
"signal/volume_coverage_5/group_std_mean": 9.40662159276684e-10,
"signal/volume_coverage_5/group_zero_std_frac": 0.9625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.425148976572537e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 7.425148976572537e-11,
"step": 80
},
{
"calibration/aurc": 0.43225073928840985,
"calibration/batch_distribution_entropy": 0.9909829492064789,
"calibration/buffer_distribution_entropy": 0.9819880995143937,
"calibration/confidence_entropy": 0.5063951814225,
"calibration/coverage@0%": 0.001953125,
"calibration/coverage@1%": 0.001953125,
"calibration/coverage@10%": 0.005078125,
"calibration/coverage@15%": 0.039453125,
"calibration/coverage@20%": 0.063671875,
"calibration/coverage@25%": 0.133984375,
"calibration/coverage@30%": 0.26191023284313725,
"calibration/coverage@5%": 0.001953125,
"calibration/ece": 0.17791232659343512,
"calibration/mean_confidence": 0.5035804291342054,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 533.0,
"completions/max_terminated_length": 533.0,
"completions/mean_length": 152.45693359375,
"completions/mean_terminated_length": 152.53079528808593,
"completions/min_length": 22.8,
"completions/min_terminated_length": 59.4,
"epoch": 0.272,
"grad_norm": 0.0013894687872380018,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 278929429.0,
"reward": 0.9068776488304138,
"reward_std": 0.11359266936779022,
"rewards/accuracy_reward": 0.505859375,
"rewards/brier_reward": 0.7291383624076844,
"rewards/confidence_uniqueness_reward": 0.9561533212661744,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0033955852035433055,
"rewards/frontier_ece_reward": 0.014794471859931945,
"rewards/frontier_entropy_batch_reward": -0.15725232064723968,
"rewards/volume_coverage_0": 2.8186108828875955e-11,
"rewards/volume_coverage_1": 2.8186108828875955e-11,
"rewards/volume_coverage_10": 1.9451884721022327e-11,
"rewards/volume_coverage_15": -8.745132416831946e-10,
"rewards/volume_coverage_20": -2.1576839356884924e-09,
"rewards/volume_coverage_25": -2.4406589527181666e-09,
"rewards/volume_coverage_5": 2.8186108828875955e-11,
"signal/accuracy_reward/centered_abs_mean": 0.1293701171875,
"signal/accuracy_reward/group_std_mean": 0.17103227078914643,
"signal/accuracy_reward/group_zero_std_frac": 0.51875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06468505859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06468505859375,
"signal/advantage_abs_mean": 0.08693162500858306,
"signal/advantage_pre_scale_abs_mean": 0.08693162500858306,
"signal/advantage_pre_scale_std": 0.13726737201213837,
"signal/advantage_std": 0.13726737201213837,
"signal/brier_reward/centered_abs_mean": 0.20380387604236602,
"signal/brier_reward/group_std_mean": 0.2516744613647461,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02038038745522499,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02038038745522499,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012038312293589115,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016938690468668936,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001203831285238266,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001203831285238266,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_std_mean": 0.0033145629800856113,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002711014449596405,
"signal/frontier_aurc_reward/group_std_mean": 0.003989389818161726,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.388768163858913e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.388768163858913e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.054540529102087024,
"signal/frontier_ece_reward/group_std_mean": 0.0740948662161827,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005454053077846766,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005454053077846766,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24244910776615142,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32230539321899415,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024244911223649978,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024244911223649978,
"signal/volume_coverage_0/centered_abs_mean": 5.712658679435378e-10,
"signal/volume_coverage_0/group_std_mean": 7.124818721115922e-10,
"signal/volume_coverage_0/group_zero_std_frac": 0.971875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.712658662088144e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.712658662088144e-11,
"signal/volume_coverage_1/centered_abs_mean": 5.712658679435378e-10,
"signal/volume_coverage_1/group_std_mean": 7.124818721115922e-10,
"signal/volume_coverage_1/group_zero_std_frac": 0.971875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.712658662088144e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.712658662088144e-11,
"signal/volume_coverage_10/centered_abs_mean": 1.208340172365041e-09,
"signal/volume_coverage_10/group_std_mean": 1.5031356812023056e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.95,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.2083401761814328e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.2083401761814328e-10,
"signal/volume_coverage_15/centered_abs_mean": 5.3549160383692396e-09,
"signal/volume_coverage_15/group_std_mean": 6.828456811103223e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.859375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.354915864896892e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.354915864896892e-10,
"signal/volume_coverage_20/centered_abs_mean": 1.0593179478046721e-08,
"signal/volume_coverage_20/group_std_mean": 1.35569361847665e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.853125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.0593180221896148e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.0593180221896148e-09,
"signal/volume_coverage_25/centered_abs_mean": 1.180654151111682e-08,
"signal/volume_coverage_25/group_std_mean": 1.5109510609345732e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.853125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.1806541300174445e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.1806541300174445e-09,
"signal/volume_coverage_5/centered_abs_mean": 5.712658679435378e-10,
"signal/volume_coverage_5/group_std_mean": 7.124818721115922e-10,
"signal/volume_coverage_5/group_zero_std_frac": 0.971875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.712658662088144e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 5.712658662088144e-11,
"step": 85
},
{
"calibration/aurc": 0.4088050668355857,
"calibration/batch_distribution_entropy": 0.9935028330405942,
"calibration/buffer_distribution_entropy": 0.9839995191322982,
"calibration/confidence_entropy": 0.5062243662924341,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.004296875,
"calibration/coverage@25%": 0.10625,
"calibration/coverage@30%": 0.15546875,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.17225932680606057,
"calibration/mean_confidence": 0.5062148171178394,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 854.4,
"completions/max_terminated_length": 854.4,
"completions/mean_length": 154.471484375,
"completions/mean_terminated_length": 154.5472412109375,
"completions/min_length": 23.4,
"completions/min_terminated_length": 60.0,
"epoch": 0.288,
"grad_norm": 0.001574499299749732,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 295469393.0,
"reward": 0.9067415595054626,
"reward_std": 0.1190925344824791,
"rewards/accuracy_reward": 0.50576171875,
"rewards/brier_reward": 0.7334963202476501,
"rewards/confidence_uniqueness_reward": 0.9556491851806641,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.003264536615461111,
"rewards/frontier_ece_reward": 0.015714940335601568,
"rewards/frontier_entropy_batch_reward": -0.16145087778568268,
"rewards/volume_coverage_0": 2.3124501792592954e-11,
"rewards/volume_coverage_1": 2.3124501792592954e-11,
"rewards/volume_coverage_10": 8.063035152744425e-11,
"rewards/volume_coverage_15": 5.572204008971737e-10,
"rewards/volume_coverage_20": 1.259253810080574e-09,
"rewards/volume_coverage_25": 8.726174105522234e-09,
"rewards/volume_coverage_5": 2.3124501792592954e-11,
"signal/accuracy_reward/centered_abs_mean": 0.140118408203125,
"signal/accuracy_reward/group_std_mean": 0.18722763955593108,
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0700592041015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0700592041015625,
"signal/advantage_abs_mean": 0.09046539664268494,
"signal/advantage_pre_scale_abs_mean": 0.09046539664268494,
"signal/advantage_pre_scale_std": 0.14088937640190125,
"signal/advantage_std": 0.14088937640190125,
"signal/brier_reward/centered_abs_mean": 0.20394990146160125,
"signal/brier_reward/group_std_mean": 0.2513652205467224,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02039499022066593,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02039499022066593,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012927304022014141,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01885262057185173,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001292730402201414,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001292730402201414,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_std_mean": 0.0049718443769961596,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026389469858258963,
"signal/frontier_aurc_reward/group_std_mean": 0.003926029847934842,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2986837322823706e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2986837322823706e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.05253084748983383,
"signal/frontier_ece_reward/group_std_mean": 0.07113655209541321,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005253084935247898,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005253084935247898,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2522804230451584,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3346827507019043,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025228042155504227,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025228042155504227,
"signal/volume_coverage_0/centered_abs_mean": 2.599262319269435e-10,
"signal/volume_coverage_0/group_std_mean": 3.3370869817472835e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.5992624198833968e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.5992624198833968e-11,
"signal/volume_coverage_1/centered_abs_mean": 2.599262319269435e-10,
"signal/volume_coverage_1/group_std_mean": 3.3370869817472835e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.5992624198833968e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.5992624198833968e-11,
"signal/volume_coverage_10/centered_abs_mean": 7.917605640561653e-10,
"signal/volume_coverage_10/group_std_mean": 1.0258700486320826e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.959375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.917605408108708e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 7.917605408108708e-11,
"signal/volume_coverage_15/centered_abs_mean": 1.934186673324678e-09,
"signal/volume_coverage_15/group_std_mean": 2.4541597212768274e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.871875,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.934186696223028e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.934186696223028e-10,
"signal/volume_coverage_20/centered_abs_mean": 3.4954434416367697e-09,
"signal/volume_coverage_20/group_std_mean": 4.4067209895026774e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.85625,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.495443653273034e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.495443653273034e-10,
"signal/volume_coverage_25/centered_abs_mean": 1.7893334902652215e-08,
"signal/volume_coverage_25/group_std_mean": 2.2423871748333824e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.753125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.7893335507723761e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.7893335507723761e-09,
"signal/volume_coverage_5/centered_abs_mean": 2.599262319269435e-10,
"signal/volume_coverage_5/group_std_mean": 3.3370869817472835e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.5992624198833968e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.5992624198833968e-11,
"step": 90
},
{
"calibration/aurc": 0.3437908959750037,
"calibration/batch_distribution_entropy": 0.9911090733037147,
"calibration/buffer_distribution_entropy": 0.9855293379549626,
"calibration/confidence_entropy": 0.5054607477470774,
"calibration/coverage@0%": 0.002734375,
"calibration/coverage@1%": 0.002734375,
"calibration/coverage@10%": 0.026953125,
"calibration/coverage@15%": 0.07305759803921569,
"calibration/coverage@20%": 0.16379136029411764,
"calibration/coverage@25%": 0.23336856617647062,
"calibration/coverage@30%": 0.3679718137254902,
"calibration/coverage@5%": 0.002734375,
"calibration/ece": 0.1264503527833825,
"calibration/mean_confidence": 0.5267013760163815,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 521.4,
"completions/max_terminated_length": 521.4,
"completions/mean_length": 157.44130859375,
"completions/mean_terminated_length": 157.533837890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 55.8,
"epoch": 0.304,
"grad_norm": 0.0015454553067684174,
"learning_rate": 1e-06,
"loss": -0.0009,
"num_tokens": 312011544.0,
"reward": 0.9047148585319519,
"reward_std": 0.11162041127681732,
"rewards/accuracy_reward": 0.4994140625,
"rewards/brier_reward": 0.7336631774902344,
"rewards/confidence_uniqueness_reward": 0.9567294597625733,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.003355294652283192,
"rewards/frontier_ece_reward": 0.014690110087394714,
"rewards/frontier_entropy_batch_reward": -0.15116712749004363,
"rewards/volume_coverage_0": 7.067736534915881e-11,
"rewards/volume_coverage_1": 7.067736534915881e-11,
"rewards/volume_coverage_10": 7.067736534915881e-11,
"rewards/volume_coverage_15": 2.0040456655268102e-10,
"rewards/volume_coverage_20": 2.8201859181287147e-10,
"rewards/volume_coverage_25": 3.96269733693444e-10,
"rewards/volume_coverage_5": 7.067736534915881e-11,
"signal/accuracy_reward/centered_abs_mean": 0.13067626953125,
"signal/accuracy_reward/group_std_mean": 0.16918764114379883,
"signal/accuracy_reward/group_zero_std_frac": 0.521875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065338134765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.065338134765625,
"signal/advantage_abs_mean": 0.08662759363651276,
"signal/advantage_pre_scale_abs_mean": 0.08662759363651276,
"signal/advantage_pre_scale_std": 0.13579559773206712,
"signal/advantage_std": 0.13579559773206712,
"signal/brier_reward/centered_abs_mean": 0.19534237086772918,
"signal/brier_reward/group_std_mean": 0.24365437030792236,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019534237310290338,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019534237310290338,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011561508290469646,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01655147448182106,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011561508290469646,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011561508290469646,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.003866990143433213,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002693092543631792,
"signal/frontier_aurc_reward/group_std_mean": 0.004001007089391351,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.366365854162723e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.366365854162723e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.0502876341342926,
"signal/frontier_ece_reward/group_std_mean": 0.06829719394445419,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005028763134032488,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005028763134032488,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2349224418401718,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31641311645507814,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023492245376110076,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023492245376110076,
"signal/volume_coverage_0/centered_abs_mean": 1.7005946095771662e-10,
"signal/volume_coverage_0/group_std_mean": 2.1498098995431648e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.7005946650883176e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.7005946650883176e-11,
"signal/volume_coverage_1/centered_abs_mean": 1.7005946095771662e-10,
"signal/volume_coverage_1/group_std_mean": 2.1498098995431648e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.7005946650883176e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.7005946650883176e-11,
"signal/volume_coverage_10/centered_abs_mean": 1.7005946095771662e-10,
"signal/volume_coverage_10/group_std_mean": 2.1498098995431648e-10,
"signal/volume_coverage_10/group_zero_std_frac": 1.0,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7005946650883176e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.7005946650883176e-11,
"signal/volume_coverage_15/centered_abs_mean": 4.253433771861381e-10,
"signal/volume_coverage_15/group_std_mean": 5.489494206423196e-10,
"signal/volume_coverage_15/group_zero_std_frac": 0.99375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.253433855128108e-11,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 4.253433855128108e-11,
"signal/volume_coverage_20/centered_abs_mean": 1.1804189611885895e-09,
"signal/volume_coverage_20/group_std_mean": 1.4893079200439807e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.953125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.1804189448821889e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.1804189448821889e-10,
"signal/volume_coverage_25/centered_abs_mean": 1.6809882472101912e-09,
"signal/volume_coverage_25/group_std_mean": 2.120693264051621e-09,
"signal/volume_coverage_25/group_zero_std_frac": 0.9375,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.680988253108251e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.680988253108251e-10,
"signal/volume_coverage_5/centered_abs_mean": 1.7005946095771662e-10,
"signal/volume_coverage_5/group_std_mean": 2.1498098995431648e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.7005946650883176e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.7005946650883176e-11,
"step": 95
},
{
"calibration/aurc": 0.2968136628582203,
"calibration/batch_distribution_entropy": 0.9913739925787933,
"calibration/buffer_distribution_entropy": 0.9865418876336769,
"calibration/confidence_entropy": 0.5013708794629002,
"calibration/coverage@0%": 0.009375,
"calibration/coverage@1%": 0.009375,
"calibration/coverage@10%": 0.078125,
"calibration/coverage@15%": 0.24296875,
"calibration/coverage@20%": 0.37578125,
"calibration/coverage@25%": 0.4625,
"calibration/coverage@30%": 0.5640625,
"calibration/coverage@5%": 0.038671875,
"calibration/ece": 0.14873248383914847,
"calibration/mean_confidence": 0.5345915049288531,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 601.2,
"completions/max_terminated_length": 601.2,
"completions/mean_length": 159.4876953125,
"completions/mean_terminated_length": 159.58204956054686,
"completions/min_length": 35.4,
"completions/min_terminated_length": 60.6,
"epoch": 0.32,
"grad_norm": 0.0011671575484797359,
"learning_rate": 1e-06,
"loss": -0.0004,
"num_tokens": 328733402.0,
"reward": 0.918266487121582,
"reward_std": 0.09741021245718003,
"rewards/accuracy_reward": 0.5234375,
"rewards/brier_reward": 0.7532869338989258,
"rewards/confidence_uniqueness_reward": 0.9563530325889588,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.003001447068527341,
"rewards/frontier_ece_reward": 0.01955104824155569,
"rewards/frontier_entropy_batch_reward": -0.15992054343223572,
"rewards/volume_coverage_0": 5.714750343950581e-11,
"rewards/volume_coverage_1": 5.714750343950581e-11,
"rewards/volume_coverage_10": -2.0253623704384605e-10,
"rewards/volume_coverage_15": 2.330152578267769e-10,
"rewards/volume_coverage_20": 3.5749040988748704e-10,
"rewards/volume_coverage_25": 1.683893230105052e-09,
"rewards/volume_coverage_5": 5.714750343950581e-11,
"signal/accuracy_reward/centered_abs_mean": 0.09888916015625,
"signal/accuracy_reward/group_std_mean": 0.13817883729934693,
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049444580078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049444580078125,
"signal/advantage_abs_mean": 0.07216062396764755,
"signal/advantage_pre_scale_abs_mean": 0.07216062396764755,
"signal/advantage_pre_scale_std": 0.12035643607378006,
"signal/advantage_std": 0.12035643607378006,
"signal/brier_reward/centered_abs_mean": 0.1801830530166626,
"signal/brier_reward/group_std_mean": 0.22755076885223388,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01801830604672432,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01801830604672432,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012103627994656562,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01730086486786604,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012103628600016237,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012103628600016237,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.003866990143433213,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002509988471865654,
"signal/frontier_aurc_reward/group_std_mean": 0.0037281450815498827,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.137485618935898e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.137485618935898e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.04895166382193565,
"signal/frontier_ece_reward/group_std_mean": 0.06564311608672142,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004895166680216789,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004895166680216789,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24786655604839325,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32686212062835696,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024786657094955443,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024786657094955443,
"signal/volume_coverage_0/centered_abs_mean": 1.6123284408975368e-10,
"signal/volume_coverage_0/group_std_mean": 2.0597639774355692e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.612328473857283e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.612328473857283e-11,
"signal/volume_coverage_1/centered_abs_mean": 1.6123284408975368e-10,
"signal/volume_coverage_1/group_std_mean": 2.0597639774355692e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.612328473857283e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.612328473857283e-11,
"signal/volume_coverage_10/centered_abs_mean": 8.140090455854843e-10,
"signal/volume_coverage_10/group_std_mean": 1.0202650856105943e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.953125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.140090494018759e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 8.140090494018759e-11,
"signal/volume_coverage_15/centered_abs_mean": 1.4006979430492006e-09,
"signal/volume_coverage_15/group_std_mean": 1.7565225794713424e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.93125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4006979239672425e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.4006979239672425e-10,
"signal/volume_coverage_20/centered_abs_mean": 2.6041023913625503e-09,
"signal/volume_coverage_20/group_std_mean": 3.265086358922309e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.909375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.604102341749459e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.604102341749459e-10,
"signal/volume_coverage_25/centered_abs_mean": 7.579259633100355e-09,
"signal/volume_coverage_25/group_std_mean": 9.551655268807968e-09,
"signal/volume_coverage_25/group_zero_std_frac": 0.76875,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 7.579259378096004e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 7.579259378096004e-10,
"signal/volume_coverage_5/centered_abs_mean": 1.6123284408975368e-10,
"signal/volume_coverage_5/group_std_mean": 2.0597639774355692e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.612328473857283e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.612328473857283e-11,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.5300616902547085,
"eval_calibration/batch_distribution_entropy": 0.9290254778537306,
"eval_calibration/buffer_distribution_entropy": 0.987155783499908,
"eval_calibration/confidence_entropy": 0.4952664114246258,
"eval_calibration/coverage@0%": 0.0390625,
"eval_calibration/coverage@1%": 0.0390625,
"eval_calibration/coverage@10%": 0.0390625,
"eval_calibration/coverage@15%": 0.0390625,
"eval_calibration/coverage@20%": 0.09375,
"eval_calibration/coverage@25%": 0.1484375,
"eval_calibration/coverage@30%": 0.1796875,
"eval_calibration/coverage@5%": 0.0390625,
"eval_calibration/ece": 0.22405392000125574,
"eval_calibration/mean_confidence": 0.45794091723888564,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 363.5,
"eval_completions/max_terminated_length": 363.5,
"eval_completions/mean_length": 168.7442741394043,
"eval_completions/mean_terminated_length": 168.7442741394043,
"eval_completions/min_length": 82.5,
"eval_completions/min_terminated_length": 82.5,
"eval_loss": 0.0,
"eval_num_tokens": 328733402.0,
"eval_reward": 0.7702741324901581,
"eval_reward_std": 0.2489350475370884,
"eval_rewards/accuracy_reward": 0.40625,
"eval_rewards/brier_reward": 0.75297711789608,
"eval_rewards/confidence_uniqueness_reward": 0.90283203125,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0036825715797021985,
"eval_rewards/frontier_ece_reward": 0.016142661683261395,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_rewards/volume_coverage_0": 7.314555679086097e-11,
"eval_rewards/volume_coverage_1": 7.314555679086097e-11,
"eval_rewards/volume_coverage_10": 7.314555679086097e-11,
"eval_rewards/volume_coverage_15": 1.8925829212325255e-09,
"eval_rewards/volume_coverage_20": 2.596937434193314e-09,
"eval_rewards/volume_coverage_25": 7.488784627140976e-09,
"eval_rewards/volume_coverage_5": 7.314555679086097e-11,
"eval_runtime": 19.1537,
"eval_samples_per_second": 26.105,
"eval_signal/accuracy_reward/centered_abs_mean": 0.46337890625,
"eval_signal/accuracy_reward/group_std_mean": 0.4884117320179939,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.231689453125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.231689453125,
"eval_signal/advantage_abs_mean": 0.23422155156731606,
"eval_signal/advantage_pre_scale_abs_mean": 0.23422155156731606,
"eval_signal/advantage_pre_scale_std": 0.2462342418730259,
"eval_signal/advantage_std": 0.2462342418730259,
"eval_signal/brier_reward/centered_abs_mean": 0.21260768920183182,
"eval_signal/brier_reward/group_std_mean": 0.26391947641968727,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02126076864078641,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02126076864078641,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0369873046875,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04263218864798546,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036987304920330644,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036987304920330644,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003667147539090365,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005491463467478752,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5839345148124266e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5839345148124266e-05,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.04754521977156401,
"eval_signal/frontier_ece_reward/group_std_mean": 0.06594326347112656,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004754522116854787,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004754522116854787,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/volume_coverage_0/centered_abs_mean": 1.6283716996329245e-10,
"eval_signal/volume_coverage_0/group_std_mean": 2.073966241800118e-10,
"eval_signal/volume_coverage_0/group_zero_std_frac": 1.0,
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.6283716996329245e-11,
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 1.6283716996329245e-11,
"eval_signal/volume_coverage_1/centered_abs_mean": 1.6283716996329245e-10,
"eval_signal/volume_coverage_1/group_std_mean": 2.073966241800118e-10,
"eval_signal/volume_coverage_1/group_zero_std_frac": 1.0,
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.6283716996329245e-11,
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 1.6283716996329245e-11,
"eval_signal/volume_coverage_10/centered_abs_mean": 1.6283716996329245e-10,
"eval_signal/volume_coverage_10/group_std_mean": 2.073966241800118e-10,
"eval_signal/volume_coverage_10/group_zero_std_frac": 1.0,
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.6283716996329245e-11,
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.6283716996329245e-11,
"eval_signal/volume_coverage_15/centered_abs_mean": 3.6766215688252557e-09,
"eval_signal/volume_coverage_15/group_std_mean": 4.504548767846062e-09,
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.75,
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.6766217349250285e-10,
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 3.6766217349250285e-10,
"eval_signal/volume_coverage_20/centered_abs_mean": 7.212183072979883e-09,
"eval_signal/volume_coverage_20/group_std_mean": 9.087623431547343e-09,
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.75,
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.212182968896474e-10,
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 7.212182968896474e-10,
"eval_signal/volume_coverage_25/centered_abs_mean": 2.1109896330973044e-08,
"eval_signal/volume_coverage_25/group_std_mean": 2.6170708489203776e-08,
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.6875,
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.1109897024862434e-09,
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 2.1109897024862434e-09,
"eval_signal/volume_coverage_5/centered_abs_mean": 1.6283716996329245e-10,
"eval_signal/volume_coverage_5/group_std_mean": 2.073966241800118e-10,
"eval_signal/volume_coverage_5/group_zero_std_frac": 1.0,
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.6283716996329245e-11,
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 1.6283716996329245e-11,
"eval_steps_per_second": 0.209,
"step": 100
},
{
"calibration/aurc": 0.3244640631384934,
"calibration/batch_distribution_entropy": 0.9834537141058156,
"calibration/buffer_distribution_entropy": 0.9890071155622993,
"calibration/confidence_entropy": 0.5025044401941181,
"calibration/coverage@0%": 0.008994312622309198,
"calibration/coverage@1%": 0.008994312622309198,
"calibration/coverage@10%": 0.03634112035225049,
"calibration/coverage@15%": 0.04924244740704501,
"calibration/coverage@20%": 0.13952803938356165,
"calibration/coverage@25%": 0.25559488136007824,
"calibration/coverage@30%": 0.42172899339530334,
"calibration/coverage@5%": 0.008994312622309198,
"calibration/ece": 0.14414761784992106,
"calibration/mean_confidence": 0.4889607813259758,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 614.0,
"completions/max_terminated_length": 614.0,
"completions/mean_length": 166.542578125,
"completions/mean_terminated_length": 166.60614318847655,
"completions/min_length": 39.2,
"completions/min_terminated_length": 66.8,
"epoch": 0.336,
"grad_norm": 0.001344151794910431,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 345161230.0,
"reward": 0.9194877028465271,
"reward_std": 0.10516626238822938,
"rewards/accuracy_reward": 0.5298828125,
"rewards/brier_reward": 0.7511946082115173,
"rewards/confidence_uniqueness_reward": 0.955797803401947,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.002879744628444314,
"rewards/frontier_ece_reward": 0.018249679170548917,
"rewards/frontier_entropy_batch_reward": -0.17697776556015016,
"rewards/volume_coverage_0": 4.766918682702626e-10,
"rewards/volume_coverage_1": 4.766918682702626e-10,
"rewards/volume_coverage_10": 2.4325521309070954e-09,
"rewards/volume_coverage_15": 3.758264648448528e-09,
"rewards/volume_coverage_20": 1.5596661688890846e-08,
"rewards/volume_coverage_25": 2.5982084825987296e-08,
"rewards/volume_coverage_5": 1.6899796385222431e-09,
"signal/accuracy_reward/centered_abs_mean": 0.11619873046875,
"signal/accuracy_reward/group_std_mean": 0.1555788427591324,
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058099365234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.058099365234375,
"signal/advantage_abs_mean": 0.08048571348190307,
"signal/advantage_pre_scale_abs_mean": 0.08048571348190307,
"signal/advantage_pre_scale_std": 0.12967196404933928,
"signal/advantage_std": 0.12967196404933928,
"signal/brier_reward/centered_abs_mean": 0.17787760496139526,
"signal/brier_reward/group_std_mean": 0.22367975115776062,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017787761986255646,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017787761986255646,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012248115800321101,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016838539764285086,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012248115846887231,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012248115846887231,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024637745693325995,
"signal/frontier_aurc_reward/group_std_mean": 0.0036766203120350838,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.079718335357029e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.079718335357029e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.04419417455792427,
"signal/frontier_ece_reward/group_std_mean": 0.05992407724261284,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00441941749304533,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00441941749304533,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26389217674732207,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.340835964679718,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026389218494296075,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026389218494296075,
"signal/volume_coverage_0/centered_abs_mean": 9.67608809396281e-10,
"signal/volume_coverage_0/group_std_mean": 1.2326531816098107e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.95625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.676088168555918e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 9.676088168555918e-11,
"signal/volume_coverage_1/centered_abs_mean": 9.67608809396281e-10,
"signal/volume_coverage_1/group_std_mean": 1.2326531816098107e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.95625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.676088168555918e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 9.676088168555918e-11,
"signal/volume_coverage_10/centered_abs_mean": 3.821803023704162e-09,
"signal/volume_coverage_10/group_std_mean": 4.833791990982439e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.85,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.8218032254525025e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.8218032254525025e-10,
"signal/volume_coverage_15/centered_abs_mean": 6.327961237992397e-09,
"signal/volume_coverage_15/group_std_mean": 7.959293004539125e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.85,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.327961339820664e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 6.327961339820664e-10,
"signal/volume_coverage_20/centered_abs_mean": 2.1903684760057884e-08,
"signal/volume_coverage_20/group_std_mean": 2.784849743239781e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.803125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.1903685564102214e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.1903685564102214e-09,
"signal/volume_coverage_25/centered_abs_mean": 3.603442056410167e-08,
"signal/volume_coverage_25/group_std_mean": 4.58625520782796e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.8,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.6034418525454638e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.6034418525454638e-09,
"signal/volume_coverage_5/centered_abs_mean": 2.5051081816579446e-09,
"signal/volume_coverage_5/group_std_mean": 3.187973132007249e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.90625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.5051084722241266e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.5051084722241266e-10,
"step": 105
},
{
"calibration/aurc": 0.36664578478043053,
"calibration/batch_distribution_entropy": 0.9767600246372599,
"calibration/buffer_distribution_entropy": 0.99403102832482,
"calibration/confidence_entropy": 0.48415713252297066,
"calibration/coverage@0%": 0.009783206947162426,
"calibration/coverage@1%": 0.009783206947162426,
"calibration/coverage@10%": 0.037166707436399216,
"calibration/coverage@15%": 0.13565924657534245,
"calibration/coverage@20%": 0.23143881482387477,
"calibration/coverage@25%": 0.35299275318003914,
"calibration/coverage@30%": 0.4241415423189824,
"calibration/coverage@5%": 0.0265991927592955,
"calibration/ece": 0.13534885462049426,
"calibration/mean_confidence": 0.45254701467405384,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 603.4,
"completions/max_terminated_length": 603.4,
"completions/mean_length": 170.66796875,
"completions/mean_terminated_length": 170.71863403320313,
"completions/min_length": 27.8,
"completions/min_terminated_length": 70.8,
"epoch": 0.352,
"grad_norm": 0.0011555871460586786,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 362169286.0,
"reward": 0.8930242538452149,
"reward_std": 0.10183399468660355,
"rewards/accuracy_reward": 0.47333984375,
"rewards/brier_reward": 0.7553327441215515,
"rewards/confidence_uniqueness_reward": 0.9558841824531555,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0032868479378521443,
"rewards/frontier_ece_reward": 0.01634308509528637,
"rewards/frontier_entropy_batch_reward": -0.16214127838611603,
"rewards/volume_coverage_0": 8.19065734767449e-10,
"rewards/volume_coverage_1": 8.19065734767449e-10,
"rewards/volume_coverage_10": 1.2110763365248688e-09,
"rewards/volume_coverage_15": 2.4374769481451397e-09,
"rewards/volume_coverage_20": 1.8794657538623305e-08,
"rewards/volume_coverage_25": 3.4976089935412345e-08,
"rewards/volume_coverage_5": 1.1241983650413234e-09,
"signal/accuracy_reward/centered_abs_mean": 0.115557861328125,
"signal/accuracy_reward/group_std_mean": 0.1505853056907654,
"signal/accuracy_reward/group_zero_std_frac": 0.575,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0577789306640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0577789306640625,
"signal/advantage_abs_mean": 0.07932186275720596,
"signal/advantage_pre_scale_abs_mean": 0.07932186275720596,
"signal/advantage_pre_scale_std": 0.12750938385725022,
"signal/advantage_std": 0.12750938385725022,
"signal/brier_reward/centered_abs_mean": 0.1764029860496521,
"signal/brier_reward/group_std_mean": 0.22145257592201234,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01764029860496521,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01764029860496521,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012107652239501476,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016175054758787156,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001210765284486115,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001210765284486115,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027799428906291725,
"signal/frontier_aurc_reward/group_std_mean": 0.004139097221195698,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.474928635114338e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.474928635114338e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.04073281139135361,
"signal/frontier_ece_reward/group_std_mean": 0.05458591654896736,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004073281120508909,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004073281120508909,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2442895472049713,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3224704086780548,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02442895546555519,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02442895546555519,
"signal/volume_coverage_0/centered_abs_mean": 1.2662086726344945e-09,
"signal/volume_coverage_0/group_std_mean": 1.5743466100226478e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.95,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.2662086505167702e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.2662086505167702e-10,
"signal/volume_coverage_1/centered_abs_mean": 1.2662086726344945e-09,
"signal/volume_coverage_1/group_std_mean": 1.5743466100226478e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.95,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.2662086505167702e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.2662086505167702e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.9809725651520616e-09,
"signal/volume_coverage_10/group_std_mean": 2.462229799216065e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.95,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.9809724542164953e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.9809724542164953e-10,
"signal/volume_coverage_15/centered_abs_mean": 3.419152831696248e-09,
"signal/volume_coverage_15/group_std_mean": 4.25322423253105e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.89375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.4191526680250885e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 3.4191526680250885e-10,
"signal/volume_coverage_20/centered_abs_mean": 1.8356375885986152e-08,
"signal/volume_coverage_20/group_std_mean": 2.2897061674465035e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.759375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.835637512825894e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.835637512825894e-09,
"signal/volume_coverage_25/centered_abs_mean": 3.538144828230117e-08,
"signal/volume_coverage_25/group_std_mean": 4.4106969121493475e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.7,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.5381449908777896e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.5381449908777896e-09,
"signal/volume_coverage_5/centered_abs_mean": 1.8225652334558085e-09,
"signal/volume_coverage_5/group_std_mean": 2.2654556466883415e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.95,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.8225652779514656e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.8225652779514656e-10,
"step": 110
},
{
"calibration/aurc": 0.3922542259570845,
"calibration/batch_distribution_entropy": 0.9862624023754109,
"calibration/buffer_distribution_entropy": 0.99757564403655,
"calibration/confidence_entropy": 0.4880892356336729,
"calibration/coverage@0%": 0.004296875,
"calibration/coverage@1%": 0.004296875,
"calibration/coverage@10%": 0.004296875,
"calibration/coverage@15%": 0.008203125,
"calibration/coverage@20%": 0.18046875,
"calibration/coverage@25%": 0.243359375,
"calibration/coverage@30%": 0.3765625,
"calibration/coverage@5%": 0.004296875,
"calibration/ece": 0.16619304690195144,
"calibration/mean_confidence": 0.4991101124115054,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 498.2,
"completions/max_terminated_length": 498.2,
"completions/mean_length": 172.508984375,
"completions/mean_terminated_length": 172.5423095703125,
"completions/min_length": 32.4,
"completions/min_terminated_length": 58.4,
"epoch": 0.368,
"grad_norm": 0.00148635427467525,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 379001250.0,
"reward": 0.8987860441207886,
"reward_std": 0.10172230154275894,
"rewards/accuracy_reward": 0.48759765625,
"rewards/brier_reward": 0.7556616544723511,
"rewards/confidence_uniqueness_reward": 0.956497323513031,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003505715122446418,
"rewards/frontier_ece_reward": 0.015234233625233174,
"rewards/frontier_entropy_batch_reward": -0.1756180554628372,
"rewards/volume_coverage_0": 9.202538037156315e-13,
"rewards/volume_coverage_1": 9.202538037156315e-13,
"rewards/volume_coverage_10": -2.2965750828829455e-12,
"rewards/volume_coverage_15": 9.785381289983963e-11,
"rewards/volume_coverage_20": 3.1048119630838755e-10,
"rewards/volume_coverage_25": 6.026713239570825e-10,
"rewards/volume_coverage_5": 9.202538037156315e-13,
"signal/accuracy_reward/centered_abs_mean": 0.110748291015625,
"signal/accuracy_reward/group_std_mean": 0.15012845695018767,
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0553741455078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0553741455078125,
"signal/advantage_abs_mean": 0.07711823582649231,
"signal/advantage_pre_scale_abs_mean": 0.07711823582649231,
"signal/advantage_pre_scale_std": 0.12509591430425643,
"signal/advantage_std": 0.12509591430425643,
"signal/brier_reward/centered_abs_mean": 0.1711456745862961,
"signal/brier_reward/group_std_mean": 0.21479713320732116,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017114568129181863,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017114568129181863,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012594586797058582,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016726733930408955,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012594586703926324,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012594586703926324,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003213394992053509,
"signal/frontier_aurc_reward/group_std_mean": 0.004921545553952455,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.016743769170716e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.016743769170716e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.03740856498479843,
"signal/frontier_ece_reward/group_std_mean": 0.049930807948112485,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037408565636724233,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037408565636724233,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2576330900192261,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3383022129535675,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025763309746980666,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025763309746980666,
"signal/volume_coverage_0/centered_abs_mean": 9.8125767233892e-11,
"signal/volume_coverage_0/group_std_mean": 1.2552429720646962e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.812575838680226e-12,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 9.812575838680226e-12,
"signal/volume_coverage_1/centered_abs_mean": 9.8125767233892e-11,
"signal/volume_coverage_1/group_std_mean": 1.2552429720646962e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.812575838680226e-12,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 9.812575838680226e-12,
"signal/volume_coverage_10/centered_abs_mean": 4.5797430708871544e-10,
"signal/volume_coverage_10/group_std_mean": 5.857822844845817e-10,
"signal/volume_coverage_10/group_zero_std_frac": 0.965625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.579742843638379e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.579742843638379e-11,
"signal/volume_coverage_15/centered_abs_mean": 1.214225502088606e-09,
"signal/volume_coverage_15/group_std_mean": 1.5410452643138672e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.925,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.214225457159268e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.214225457159268e-10,
"signal/volume_coverage_20/centered_abs_mean": 1.8293037196137618e-09,
"signal/volume_coverage_20/group_std_mean": 2.3235737112159425e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.9125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.8293037154504254e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.8293037154504254e-10,
"signal/volume_coverage_25/centered_abs_mean": 2.8839780452649675e-09,
"signal/volume_coverage_25/group_std_mean": 3.655950098213623e-09,
"signal/volume_coverage_25/group_zero_std_frac": 0.8875,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.8839779661615774e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.8839779661615774e-10,
"signal/volume_coverage_5/centered_abs_mean": 9.8125767233892e-11,
"signal/volume_coverage_5/group_std_mean": 1.2552429720646962e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 9.812575838680226e-12,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 9.812575838680226e-12,
"step": 115
},
{
"calibration/aurc": 0.3513991983148937,
"calibration/batch_distribution_entropy": 0.9860781630180767,
"calibration/buffer_distribution_entropy": 0.998864146535581,
"calibration/confidence_entropy": 0.4976471261852248,
"calibration/coverage@0%": 0.022265625,
"calibration/coverage@1%": 0.022265625,
"calibration/coverage@10%": 0.072265625,
"calibration/coverage@15%": 0.182421875,
"calibration/coverage@20%": 0.242578125,
"calibration/coverage@25%": 0.31484375,
"calibration/coverage@30%": 0.3953125,
"calibration/coverage@5%": 0.044140625,
"calibration/ece": 0.14944111182549608,
"calibration/mean_confidence": 0.46471985469835875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 627.0,
"completions/max_terminated_length": 627.0,
"completions/mean_length": 171.5482421875,
"completions/mean_terminated_length": 171.61572875976563,
"completions/min_length": 27.0,
"completions/min_terminated_length": 68.6,
"epoch": 0.384,
"grad_norm": 0.0012787673622369766,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 395614416.0,
"reward": 0.9182936072349548,
"reward_std": 0.10047120004892349,
"rewards/accuracy_reward": 0.52353515625,
"rewards/brier_reward": 0.7724483251571655,
"rewards/confidence_uniqueness_reward": 0.9581897497177124,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0029781535267829895,
"rewards/frontier_ece_reward": 0.01786994356662035,
"rewards/frontier_entropy_batch_reward": -0.18092238306999206,
"rewards/volume_coverage_0": 4.383281107869785e-11,
"rewards/volume_coverage_1": 4.383281107869785e-11,
"rewards/volume_coverage_10": 4.383281107869785e-11,
"rewards/volume_coverage_15": 8.713083666025967e-11,
"rewards/volume_coverage_20": 4.434830815948365e-10,
"rewards/volume_coverage_25": 6.633161402959387e-10,
"rewards/volume_coverage_5": 4.383281107869785e-11,
"signal/accuracy_reward/centered_abs_mean": 0.112371826171875,
"signal/accuracy_reward/group_std_mean": 0.14683832228183746,
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0561859130859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0561859130859375,
"signal/advantage_abs_mean": 0.07782022804021835,
"signal/advantage_pre_scale_abs_mean": 0.07782022804021835,
"signal/advantage_pre_scale_std": 0.12562936544418335,
"signal/advantage_std": 0.12562936544418335,
"signal/brier_reward/centered_abs_mean": 0.1601964920759201,
"signal/brier_reward/group_std_mean": 0.2036239355802536,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016019649058580398,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016019649058580398,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012591696158051491,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0170934084802866,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001259169657714665,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001259169657714665,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033347518648952244,
"signal/frontier_aurc_reward/group_std_mean": 0.0051669498905539514,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.168439918430522e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.168439918430522e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.034313207119703294,
"signal/frontier_ece_reward/group_std_mean": 0.04498266875743866,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003431320795789361,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003431320795789361,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2615587115287781,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3394892454147339,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026155871525406837,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026155871525406837,
"signal/volume_coverage_0/centered_abs_mean": 1.3037857460318492e-10,
"signal/volume_coverage_0/group_std_mean": 1.6798773205817242e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.3037858639930455e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.3037858639930455e-11,
"signal/volume_coverage_1/centered_abs_mean": 1.3037857460318492e-10,
"signal/volume_coverage_1/group_std_mean": 1.6798773205817242e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.3037858639930455e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.3037858639930455e-11,
"signal/volume_coverage_10/centered_abs_mean": 1.3037857460318492e-10,
"signal/volume_coverage_10/group_std_mean": 1.6798773205817242e-10,
"signal/volume_coverage_10/group_zero_std_frac": 1.0,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.3037858639930455e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.3037858639930455e-11,
"signal/volume_coverage_15/centered_abs_mean": 3.371282447650614e-10,
"signal/volume_coverage_15/group_std_mean": 4.31999752636969e-10,
"signal/volume_coverage_15/group_zero_std_frac": 1.0,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.371282558672917e-11,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 3.371282558672917e-11,
"signal/volume_coverage_20/centered_abs_mean": 1.909953942225151e-09,
"signal/volume_coverage_20/group_std_mean": 2.4386795027808716e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.953125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.9099540005118598e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.9099540005118598e-10,
"signal/volume_coverage_25/centered_abs_mean": 3.69576863834542e-09,
"signal/volume_coverage_25/group_std_mean": 4.7769367295114055e-09,
"signal/volume_coverage_25/group_zero_std_frac": 0.903125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.69576855924203e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.69576855924203e-10,
"signal/volume_coverage_5/centered_abs_mean": 1.3037857460318492e-10,
"signal/volume_coverage_5/group_std_mean": 1.6798773205817242e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.3037858639930455e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.3037858639930455e-11,
"step": 120
},
{
"calibration/aurc": 0.4437287155933821,
"calibration/batch_distribution_entropy": 0.9884368812661011,
"calibration/buffer_distribution_entropy": 0.9989521601251751,
"calibration/confidence_entropy": 0.5128253874437341,
"calibration/coverage@0%": 0.002740502450980392,
"calibration/coverage@1%": 0.002740502450980392,
"calibration/coverage@10%": 0.005865502450980392,
"calibration/coverage@15%": 0.005865502450980392,
"calibration/coverage@20%": 0.007037377450980392,
"calibration/coverage@25%": 0.046881127450980394,
"calibration/coverage@30%": 0.13672947303921568,
"calibration/coverage@5%": 0.002740502450980392,
"calibration/ece": 0.15523985953050676,
"calibration/mean_confidence": 0.5022496927820296,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 533.0,
"completions/max_terminated_length": 533.0,
"completions/mean_length": 167.06259765625,
"completions/mean_terminated_length": 167.11257629394532,
"completions/min_length": 43.8,
"completions/min_terminated_length": 69.2,
"epoch": 0.4,
"grad_norm": 0.001277610776014626,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 412361585.0,
"reward": 0.9021111249923706,
"reward_std": 0.10790342837572098,
"rewards/accuracy_reward": 0.49384765625,
"rewards/brier_reward": 0.7573448777198791,
"rewards/confidence_uniqueness_reward": 0.9609808683395386,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0036756881512701512,
"rewards/frontier_ece_reward": 0.013616420142352582,
"rewards/frontier_entropy_batch_reward": -0.17814513444900512,
"rewards/volume_coverage_0": 1.0085748902710634e-10,
"rewards/volume_coverage_1": 1.0085748902710634e-10,
"rewards/volume_coverage_10": 6.06503069988662e-11,
"rewards/volume_coverage_15": -2.9825425462703946e-11,
"rewards/volume_coverage_20": 8.66519167530555e-10,
"rewards/volume_coverage_25": 6.701753374471764e-10,
"rewards/volume_coverage_5": 1.0085748902710634e-10,
"signal/accuracy_reward/centered_abs_mean": 0.126629638671875,
"signal/accuracy_reward/group_std_mean": 0.1655414193868637,
"signal/accuracy_reward/group_zero_std_frac": 0.525,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0633148193359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0633148193359375,
"signal/advantage_abs_mean": 0.0843367651104927,
"signal/advantage_pre_scale_abs_mean": 0.0843367651104927,
"signal/advantage_pre_scale_std": 0.13356164544820787,
"signal/advantage_std": 0.13356164544820787,
"signal/brier_reward/centered_abs_mean": 0.16698363721370696,
"signal/brier_reward/group_std_mean": 0.21023752689361572,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01669836454093456,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01669836454093456,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012285609915852546,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015908705443143843,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012285609962418675,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012285609962418675,
"signal/format_reward/centered_abs_mean": 0.000555419921875,
"signal/format_reward/group_std_mean": 0.0013209730386734009,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003910475643351674,
"signal/frontier_aurc_reward/group_std_mean": 0.00609401436522603,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.888094772468321e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.888094772468321e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.03086501657962799,
"signal/frontier_ece_reward/group_std_mean": 0.0409327894449234,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003086501592770219,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003086501592770219,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26057218909263613,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3405936896800995,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026057218760252,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026057218760252,
"signal/volume_coverage_0/centered_abs_mean": 8.354765185236701e-10,
"signal/volume_coverage_0/group_std_mean": 1.0910699621557996e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.965625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.354765455853563e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 8.354765455853563e-11,
"signal/volume_coverage_1/centered_abs_mean": 8.354765185236701e-10,
"signal/volume_coverage_1/group_std_mean": 1.0910699621557996e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.965625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.354765455853563e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 8.354765455853563e-11,
"signal/volume_coverage_10/centered_abs_mean": 9.910879533725493e-10,
"signal/volume_coverage_10/group_std_mean": 1.2953009054239572e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.959375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 9.910879582297749e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 9.910879582297749e-11,
"signal/volume_coverage_15/centered_abs_mean": 2.1594268345692667e-09,
"signal/volume_coverage_15/group_std_mean": 2.8284628694752454e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.925,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.1594270364910796e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.1594270364910796e-10,
"signal/volume_coverage_20/centered_abs_mean": 3.882546484312854e-09,
"signal/volume_coverage_20/group_std_mean": 5.0664275552669835e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.8825464718228453e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.8825464718228453e-10,
"signal/volume_coverage_25/centered_abs_mean": 1.0074538070448113e-08,
"signal/volume_coverage_25/group_std_mean": 1.3182782165888085e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.85,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.0074538168980408e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.0074538168980408e-09,
"signal/volume_coverage_5/centered_abs_mean": 8.354765185236701e-10,
"signal/volume_coverage_5/group_std_mean": 1.0910699621557996e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.965625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.354765455853563e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 8.354765455853563e-11,
"step": 125
},
{
"calibration/aurc": 0.35445632284315787,
"calibration/batch_distribution_entropy": 0.9813028422811737,
"calibration/buffer_distribution_entropy": 0.9989037529411805,
"calibration/confidence_entropy": 0.5254088005465698,
"calibration/coverage@0%": 0.00078125,
"calibration/coverage@1%": 0.00078125,
"calibration/coverage@10%": 0.005078125,
"calibration/coverage@15%": 0.0140625,
"calibration/coverage@20%": 0.07722249508840864,
"calibration/coverage@25%": 0.16574088285854618,
"calibration/coverage@30%": 0.2815293467583497,
"calibration/coverage@5%": 0.00078125,
"calibration/ece": 0.09861364113925782,
"calibration/mean_confidence": 0.5204927474029243,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 465.2,
"completions/max_terminated_length": 465.2,
"completions/mean_length": 166.2703125,
"completions/mean_terminated_length": 166.31866149902345,
"completions/min_length": 38.2,
"completions/min_terminated_length": 63.6,
"epoch": 0.416,
"grad_norm": 0.0013528935378417373,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 428945377.0,
"reward": 0.9096973180770874,
"reward_std": 0.1019922986626625,
"rewards/accuracy_reward": 0.5060546875,
"rewards/brier_reward": 0.7639304637908936,
"rewards/confidence_uniqueness_reward": 0.96273832321167,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.003395315073430538,
"rewards/frontier_ece_reward": 0.013873597048223019,
"rewards/frontier_entropy_batch_reward": -0.17146520018577577,
"rewards/volume_coverage_0": 7.473216906417335e-11,
"rewards/volume_coverage_1": 7.473216906417335e-11,
"rewards/volume_coverage_10": 7.473216906417335e-11,
"rewards/volume_coverage_15": 7.473216906417335e-11,
"rewards/volume_coverage_20": 7.026016463074214e-10,
"rewards/volume_coverage_25": 1.0181003540221667e-09,
"rewards/volume_coverage_5": 7.473216906417335e-11,
"signal/accuracy_reward/centered_abs_mean": 0.11749267578125,
"signal/accuracy_reward/group_std_mean": 0.15149664878845215,
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058746337890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.058746337890625,
"signal/advantage_abs_mean": 0.07992852181196212,
"signal/advantage_pre_scale_abs_mean": 0.07992852181196212,
"signal/advantage_pre_scale_std": 0.1282731533050537,
"signal/advantage_std": 0.1282731533050537,
"signal/brier_reward/centered_abs_mean": 0.1657171666622162,
"signal/brier_reward/group_std_mean": 0.20892676711082458,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016571716964244844,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016571716964244844,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012295385263860226,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016405154950916767,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001229538512416184,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001229538512416184,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003926783287897706,
"signal/frontier_aurc_reward/group_std_mean": 0.0064892381429672245,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.908479022560641e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.908479022560641e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.02886553555727005,
"signal/frontier_ece_reward/group_std_mean": 0.03819820955395699,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028865536209195853,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028865536209195853,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25341747999191283,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3332586348056793,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02534174807369709,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02534174807369709,
"signal/volume_coverage_0/centered_abs_mean": 1.8584461053405832e-10,
"signal/volume_coverage_0/group_std_mean": 2.3160315193448966e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_1/centered_abs_mean": 1.8584461053405832e-10,
"signal/volume_coverage_1/group_std_mean": 2.3160315193448966e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_10/centered_abs_mean": 1.8584461053405832e-10,
"signal/volume_coverage_10/group_std_mean": 2.3160315193448966e-10,
"signal/volume_coverage_10/group_zero_std_frac": 1.0,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_15/centered_abs_mean": 1.8584461053405832e-10,
"signal/volume_coverage_15/group_std_mean": 2.3160315193448966e-10,
"signal/volume_coverage_15/group_zero_std_frac": 1.0,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_20/centered_abs_mean": 1.476313882253777e-09,
"signal/volume_coverage_20/group_std_mean": 1.8577961946597554e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.96875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.4763138836415557e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.4763138836415557e-10,
"signal/volume_coverage_25/centered_abs_mean": 2.8416659803021104e-09,
"signal/volume_coverage_25/group_std_mean": 3.5887031346604203e-09,
"signal/volume_coverage_25/group_zero_std_frac": 0.896875,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.8416659927921194e-10,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.8416659927921194e-10,
"signal/volume_coverage_5/centered_abs_mean": 1.8584461053405832e-10,
"signal/volume_coverage_5/group_std_mean": 2.3160315193448966e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.858446053298879e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.858446053298879e-11,
"step": 130
},
{
"calibration/aurc": 0.2953629301468977,
"calibration/batch_distribution_entropy": 0.9862223812365766,
"calibration/buffer_distribution_entropy": 0.9991731870187508,
"calibration/confidence_entropy": 0.4962810437639968,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.0664528803816047,
"calibration/coverage@15%": 0.11689930895303327,
"calibration/coverage@20%": 0.30368838674168297,
"calibration/coverage@25%": 0.3994855369373777,
"calibration/coverage@30%": 0.4749120902641879,
"calibration/coverage@5%": 0.00546875,
"calibration/ece": 0.11536401207599001,
"calibration/mean_confidence": 0.5435637005194328,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 522.8,
"completions/max_terminated_length": 522.8,
"completions/mean_length": 164.665234375,
"completions/mean_terminated_length": 164.7296905517578,
"completions/min_length": 14.6,
"completions/min_terminated_length": 68.0,
"epoch": 0.432,
"grad_norm": 0.001812662580050528,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 445645885.0,
"reward": 0.9282691955566407,
"reward_std": 0.09776676595211028,
"rewards/accuracy_reward": 0.5431640625,
"rewards/brier_reward": 0.7781499981880188,
"rewards/confidence_uniqueness_reward": 0.9612111330032349,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.002956994064152241,
"rewards/frontier_ece_reward": 0.016237000189721584,
"rewards/frontier_entropy_batch_reward": -0.1859154611825943,
"rewards/volume_coverage_0": 1.439715041628986e-10,
"rewards/volume_coverage_1": 1.439715041628986e-10,
"rewards/volume_coverage_10": 3.679456433980377e-10,
"rewards/volume_coverage_15": 5.91844750985615e-10,
"rewards/volume_coverage_20": 1.4051429186867637e-09,
"rewards/volume_coverage_25": 6.444645928027626e-09,
"rewards/volume_coverage_5": 1.439715041628986e-10,
"signal/accuracy_reward/centered_abs_mean": 0.10758056640625,
"signal/accuracy_reward/group_std_mean": 0.1392093226313591,
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053790283203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.053790283203125,
"signal/advantage_abs_mean": 0.07623622268438339,
"signal/advantage_pre_scale_abs_mean": 0.07623622268438339,
"signal/advantage_pre_scale_std": 0.12360891848802566,
"signal/advantage_std": 0.12360891848802566,
"signal/brier_reward/centered_abs_mean": 0.15707127153873443,
"signal/brier_reward/group_std_mean": 0.19963068664073944,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01570712644606829,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01570712644606829,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012181778438389301,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01665004901587963,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001218177890405059,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001218177890405059,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0038814083207398655,
"signal/frontier_aurc_reward/group_std_mean": 0.006444942206144333,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8517604591324924e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8517604591324924e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.027643512561917306,
"signal/frontier_ece_reward/group_std_mean": 0.036360897868871686,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002764351200312376,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002764351200312376,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26680874824523926,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3425988554954529,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026680874824523925,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026680874824523925,
"signal/volume_coverage_0/centered_abs_mean": 3.116232355493409e-10,
"signal/volume_coverage_0/group_std_mean": 3.9976172094835506e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.116232358962856e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 3.116232358962856e-11,
"signal/volume_coverage_1/centered_abs_mean": 3.116232355493409e-10,
"signal/volume_coverage_1/group_std_mean": 3.9976172094835506e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.116232358962856e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 3.116232358962856e-11,
"signal/volume_coverage_10/centered_abs_mean": 8.034230308817669e-10,
"signal/volume_coverage_10/group_std_mean": 1.0218955376339877e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.96875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.034230242898177e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 8.034230242898177e-11,
"signal/volume_coverage_15/centered_abs_mean": 1.9115799082536356e-09,
"signal/volume_coverage_15/group_std_mean": 2.4280549376642924e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.93125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.9115798128438444e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.9115798128438444e-10,
"signal/volume_coverage_20/centered_abs_mean": 3.9429249309996806e-09,
"signal/volume_coverage_20/group_std_mean": 5.0118225125572735e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.828125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.942925042021983e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.942925042021983e-10,
"signal/volume_coverage_25/centered_abs_mean": 1.0928629468054396e-08,
"signal/volume_coverage_25/group_std_mean": 1.3933071763005955e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.6125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.0928629334827632e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.0928629334827632e-09,
"signal/volume_coverage_5/centered_abs_mean": 3.116232355493409e-10,
"signal/volume_coverage_5/group_std_mean": 3.9976172094835506e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.116232358962856e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 3.116232358962856e-11,
"step": 135
},
{
"calibration/aurc": 0.30513196272360465,
"calibration/batch_distribution_entropy": 0.9789796952184366,
"calibration/buffer_distribution_entropy": 0.9992924953685062,
"calibration/confidence_entropy": 0.5081590689078211,
"calibration/coverage@0%": 0.001953125,
"calibration/coverage@1%": 0.001953125,
"calibration/coverage@10%": 0.036328125,
"calibration/coverage@15%": 0.1,
"calibration/coverage@20%": 0.208984375,
"calibration/coverage@25%": 0.295703125,
"calibration/coverage@30%": 0.52109375,
"calibration/coverage@5%": 0.001953125,
"calibration/ece": 0.12843204989485463,
"calibration/mean_confidence": 0.5509481067455588,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 590.2,
"completions/max_terminated_length": 590.2,
"completions/mean_length": 170.64814453125,
"completions/mean_terminated_length": 170.64814453125,
"completions/min_length": 63.6,
"completions/min_terminated_length": 63.6,
"epoch": 0.448,
"grad_norm": 0.001530295587144792,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 462346122.0,
"reward": 0.9137397766113281,
"reward_std": 0.09540319591760635,
"rewards/accuracy_reward": 0.51552734375,
"rewards/brier_reward": 0.770829725265503,
"rewards/confidence_uniqueness_reward": 0.9601699829101562,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0034271135926246645,
"rewards/frontier_ece_reward": 0.012508058547973632,
"rewards/frontier_entropy_batch_reward": -0.18331859707832338,
"rewards/volume_coverage_0": 1.893049966783522e-10,
"rewards/volume_coverage_1": 1.893049966783522e-10,
"rewards/volume_coverage_10": 1.893049966783522e-10,
"rewards/volume_coverage_15": 2.776873092580345e-10,
"rewards/volume_coverage_20": 1.1103639230292117e-09,
"rewards/volume_coverage_25": 1.2780476343898783e-08,
"rewards/volume_coverage_5": 1.893049966783522e-10,
"signal/accuracy_reward/centered_abs_mean": 0.106903076171875,
"signal/accuracy_reward/group_std_mean": 0.1370233952999115,
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0534515380859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0534515380859375,
"signal/advantage_abs_mean": 0.07518869191408158,
"signal/advantage_pre_scale_abs_mean": 0.07518869191408158,
"signal/advantage_pre_scale_std": 0.1213814526796341,
"signal/advantage_std": 0.1213814526796341,
"signal/brier_reward/centered_abs_mean": 0.16107324361801148,
"signal/brier_reward/group_std_mean": 0.20332952439785004,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016107324883341788,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016107324883341788,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011767148971557617,
"signal/confidence_uniqueness_reward/group_std_mean": 0.014688951708376408,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001176714920438826,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001176714920438826,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0038240184541791676,
"signal/frontier_aurc_reward/group_std_mean": 0.006383162178099156,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.780023155035451e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.780023155035451e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.02547091469168663,
"signal/frontier_ece_reward/group_std_mean": 0.03375823795795441,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002547091618180275,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002547091618180275,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2633515000343323,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3400727391242981,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026335151121020316,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026335151121020316,
"signal/volume_coverage_0/centered_abs_mean": 6.843173694370819e-10,
"signal/volume_coverage_0/group_std_mean": 8.689262309680146e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.843173597226304e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.843173597226304e-11,
"signal/volume_coverage_1/centered_abs_mean": 6.843173694370819e-10,
"signal/volume_coverage_1/group_std_mean": 8.689262309680146e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.843173597226304e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.843173597226304e-11,
"signal/volume_coverage_10/centered_abs_mean": 6.843173694370819e-10,
"signal/volume_coverage_10/group_std_mean": 8.689262309680146e-10,
"signal/volume_coverage_10/group_zero_std_frac": 1.0,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 6.843173597226304e-11,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 6.843173597226304e-11,
"signal/volume_coverage_15/centered_abs_mean": 1.210085631697666e-09,
"signal/volume_coverage_15/group_std_mean": 1.518687620105652e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.984375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.2100856011665328e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.2100856011665328e-10,
"signal/volume_coverage_20/centered_abs_mean": 4.973534961649761e-09,
"signal/volume_coverage_20/group_std_mean": 6.1977475152019675e-09,
"signal/volume_coverage_20/group_zero_std_frac": 0.884375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.97353498662978e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 4.97353498662978e-10,
"signal/volume_coverage_25/centered_abs_mean": 2.4731123815513456e-08,
"signal/volume_coverage_25/group_std_mean": 3.0804845252419e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.55,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.473112403755806e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.473112403755806e-09,
"signal/volume_coverage_5/centered_abs_mean": 6.843173694370819e-10,
"signal/volume_coverage_5/group_std_mean": 8.689262309680146e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.843173597226304e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 6.843173597226304e-11,
"step": 140
},
{
"calibration/aurc": 0.4182242037120104,
"calibration/batch_distribution_entropy": 0.9825868175253107,
"calibration/buffer_distribution_entropy": 0.9992167885447563,
"calibration/confidence_entropy": 0.528219422297434,
"calibration/coverage@0%": 0.001171875,
"calibration/coverage@1%": 0.001171875,
"calibration/coverage@10%": 0.001171875,
"calibration/coverage@15%": 0.002734375,
"calibration/coverage@20%": 0.07429519324853229,
"calibration/coverage@25%": 0.11929504036203523,
"calibration/coverage@30%": 0.24838169642857144,
"calibration/coverage@5%": 0.001171875,
"calibration/ece": 0.13537476420506195,
"calibration/mean_confidence": 0.48931586147542616,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 549.2,
"completions/max_terminated_length": 549.2,
"completions/mean_length": 173.40908203125,
"completions/mean_terminated_length": 173.5105773925781,
"completions/min_length": 16.0,
"completions/min_terminated_length": 72.8,
"epoch": 0.464,
"grad_norm": 0.0009655957692302763,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 479292647.0,
"reward": 0.8837283730506897,
"reward_std": 0.09050512313842773,
"rewards/accuracy_reward": 0.460546875,
"rewards/brier_reward": 0.7583501458168029,
"rewards/confidence_uniqueness_reward": 0.957415759563446,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.003975970856845379,
"rewards/frontier_ece_reward": 0.009838496148586274,
"rewards/frontier_entropy_batch_reward": -0.1871403008699417,
"rewards/volume_coverage_0": 3.040359616846011e-10,
"rewards/volume_coverage_1": 3.040359616846011e-10,
"rewards/volume_coverage_10": 9.855517194012898e-10,
"rewards/volume_coverage_15": 2.2260005616558944e-09,
"rewards/volume_coverage_20": 4.769506212198848e-09,
"rewards/volume_coverage_25": 2.9589145800201777e-08,
"rewards/volume_coverage_5": 3.040359616846011e-10,
"signal/accuracy_reward/centered_abs_mean": 0.08966064453125,
"signal/accuracy_reward/group_std_mean": 0.12274214625358582,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044830322265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044830322265625,
"signal/advantage_abs_mean": 0.06799145862460136,
"signal/advantage_pre_scale_abs_mean": 0.06799145862460136,
"signal/advantage_pre_scale_std": 0.11409917026758194,
"signal/advantage_std": 0.11409917026758194,
"signal/brier_reward/centered_abs_mean": 0.15883066058158873,
"signal/brier_reward/group_std_mean": 0.20072786509990692,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01588306687772274,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01588306687772274,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01368649173527956,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019033579528331755,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013686491874977946,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013686491874977946,
"signal/format_reward/centered_abs_mean": 0.001312255859375,
"signal/format_reward/group_std_mean": 0.0035306816920638085,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0037599447183310985,
"signal/frontier_aurc_reward/group_std_mean": 0.006334328558295965,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6999311598483474e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6999311598483474e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.02307127006351948,
"signal/frontier_ece_reward/group_std_mean": 0.03083600252866745,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002307126997038722,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002307126997038722,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2677737444639206,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.345480477809906,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026777375489473343,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026777375489473343,
"signal/volume_coverage_0/centered_abs_mean": 4.1880419132667155e-10,
"signal/volume_coverage_0/group_std_mean": 5.327342095240439e-10,
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.1880420312279123e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.1880420312279123e-11,
"signal/volume_coverage_1/centered_abs_mean": 4.1880419132667155e-10,
"signal/volume_coverage_1/group_std_mean": 5.327342095240439e-10,
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.1880420312279123e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.1880420312279123e-11,
"signal/volume_coverage_10/centered_abs_mean": 1.0768645974934543e-09,
"signal/volume_coverage_10/group_std_mean": 1.392269344258068e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.9625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.0768645974934543e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.0768645974934543e-10,
"signal/volume_coverage_15/centered_abs_mean": 3.6891373511593883e-09,
"signal/volume_coverage_15/group_std_mean": 4.7468447395004885e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.840625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.6891373567105034e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 3.6891373567105034e-10,
"signal/volume_coverage_20/centered_abs_mean": 9.620456786763043e-09,
"signal/volume_coverage_20/group_std_mean": 1.240889400122569e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.575,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 9.620456764558582e-10,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 9.620456764558582e-10,
"signal/volume_coverage_25/centered_abs_mean": 6.344143557157623e-08,
"signal/volume_coverage_25/group_std_mean": 8.170086047698533e-08,
"signal/volume_coverage_25/group_zero_std_frac": 0.190625,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 6.344143699266169e-09,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 6.344143699266169e-09,
"signal/volume_coverage_5/centered_abs_mean": 4.1880419132667155e-10,
"signal/volume_coverage_5/group_std_mean": 5.327342095240439e-10,
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.1880420312279123e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.1880420312279123e-11,
"step": 145
},
{
"calibration/aurc": 0.3298085505781082,
"calibration/batch_distribution_entropy": 0.981574292660393,
"calibration/buffer_distribution_entropy": 0.9993247562967712,
"calibration/confidence_entropy": 0.49599020856948994,
"calibration/coverage@0%": 0.001171875,
"calibration/coverage@1%": 0.001171875,
"calibration/coverage@10%": 0.0046875,
"calibration/coverage@15%": 0.0046875,
"calibration/coverage@20%": 0.19724651418786693,
"calibration/coverage@25%": 0.37394966976516636,
"calibration/coverage@30%": 0.4459171660958904,
"calibration/coverage@5%": 0.001171875,
"calibration/ece": 0.14921269561400377,
"calibration/mean_confidence": 0.5074699378823706,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 550.8,
"completions/max_terminated_length": 550.8,
"completions/mean_length": 173.545703125,
"completions/mean_terminated_length": 173.6138458251953,
"completions/min_length": 24.2,
"completions/min_terminated_length": 70.2,
"epoch": 0.48,
"grad_norm": 0.001252944814041257,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 496117787.0,
"reward": 0.9171468496322632,
"reward_std": 0.10072523653507233,
"rewards/accuracy_reward": 0.52587890625,
"rewards/brier_reward": 0.7633208990097046,
"rewards/confidence_uniqueness_reward": 0.958120334148407,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0032815839163959025,
"rewards/frontier_ece_reward": 0.010945961438119411,
"rewards/frontier_entropy_batch_reward": -0.1874622732400894,
"rewards/volume_coverage_0": 5.831987115456627e-10,
"rewards/volume_coverage_1": 5.831987115456627e-10,
"rewards/volume_coverage_10": 1.4444269114122221e-09,
"rewards/volume_coverage_15": 3.3241117414206656e-09,
"rewards/volume_coverage_20": 5.639911000443476e-08,
"rewards/volume_coverage_25": 4.1795596104066137e-07,
"rewards/volume_coverage_5": 5.831987115456627e-10,
"signal/accuracy_reward/centered_abs_mean": 0.114166259765625,
"signal/accuracy_reward/group_std_mean": 0.15151501297950745,
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0570831298828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0570831298828125,
"signal/advantage_abs_mean": 0.07722624242305756,
"signal/advantage_pre_scale_abs_mean": 0.07722624242305756,
"signal/advantage_pre_scale_std": 0.12476578801870346,
"signal/advantage_std": 0.12476578801870346,
"signal/brier_reward/centered_abs_mean": 0.16023366451263427,
"signal/brier_reward/group_std_mean": 0.2034299075603485,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01602336745709181,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01602336745709181,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012775503285229205,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017437696829438208,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012775503797456621,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012775503797456621,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003498147753998637,
"signal/frontier_aurc_reward/group_std_mean": 0.00594499446451664,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3726846342906356e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3726846342906356e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.02257801927626133,
"signal/frontier_ece_reward/group_std_mean": 0.029622122645378113,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022578019183129073,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022578019183129073,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2693198204040527,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3461536645889282,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02693198397755623,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02693198397755623,
"signal/volume_coverage_0/centered_abs_mean": 1.38831864893163e-09,
"signal/volume_coverage_0/group_std_mean": 1.7580332856148572e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.975,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.388318682238321e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.388318682238321e-10,
"signal/volume_coverage_1/centered_abs_mean": 1.38831864893163e-09,
"signal/volume_coverage_1/group_std_mean": 1.7580332856148572e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.975,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.388318682238321e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.388318682238321e-10,
"signal/volume_coverage_10/centered_abs_mean": 3.6237949530004697e-09,
"signal/volume_coverage_10/group_std_mean": 4.640102768682653e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.85,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.6237949654904787e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.6237949654904787e-10,
"signal/volume_coverage_15/centered_abs_mean": 8.293814324211724e-09,
"signal/volume_coverage_15/group_std_mean": 1.0603659017505151e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.671875,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.293814079962658e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 8.293814079962658e-10,
"signal/volume_coverage_20/centered_abs_mean": 9.955687936269442e-08,
"signal/volume_coverage_20/group_std_mean": 1.2609622110915097e-07,
"signal/volume_coverage_20/group_zero_std_frac": 0.421875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 9.955688029528175e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 9.955688029528175e-09,
"signal/volume_coverage_25/centered_abs_mean": 8.92666632523742e-07,
"signal/volume_coverage_25/group_std_mean": 1.1310221950111554e-06,
"signal/volume_coverage_25/group_zero_std_frac": 0.2625,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 8.926666517083959e-08,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 8.926666517083959e-08,
"signal/volume_coverage_5/centered_abs_mean": 1.38831864893163e-09,
"signal/volume_coverage_5/group_std_mean": 1.7580332856148572e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.975,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.388318682238321e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.388318682238321e-10,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.467700578320845,
"eval_calibration/batch_distribution_entropy": 0.9465067652345415,
"eval_calibration/buffer_distribution_entropy": 0.9993365061314878,
"eval_calibration/confidence_entropy": 0.4856897573337645,
"eval_calibration/coverage@0%": 0.0234375,
"eval_calibration/coverage@1%": 0.0234375,
"eval_calibration/coverage@10%": 0.0234375,
"eval_calibration/coverage@15%": 0.09375,
"eval_calibration/coverage@20%": 0.2109375,
"eval_calibration/coverage@25%": 0.296875,
"eval_calibration/coverage@30%": 0.3125,
"eval_calibration/coverage@5%": 0.0234375,
"eval_calibration/ece": 0.24495353732649747,
"eval_calibration/mean_confidence": 0.49381096837038924,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 344.5,
"eval_completions/max_terminated_length": 344.5,
"eval_completions/mean_length": 177.96430587768555,
"eval_completions/mean_terminated_length": 177.96430587768555,
"eval_completions/min_length": 97.25,
"eval_completions/min_terminated_length": 97.25,
"eval_loss": 0.0,
"eval_num_tokens": 496117787.0,
"eval_reward": 0.7738081067800522,
"eval_reward_std": 0.25044039636850357,
"eval_rewards/accuracy_reward": 0.408203125,
"eval_rewards/brier_reward": 0.7795219719409943,
"eval_rewards/confidence_uniqueness_reward": 0.90625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.00398009066702798,
"eval_rewards/frontier_ece_reward": 0.011790585471317172,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_rewards/volume_coverage_0": 1.088547529892736e-09,
"eval_rewards/volume_coverage_1": 1.088547529892736e-09,
"eval_rewards/volume_coverage_10": 3.2117084824978548e-09,
"eval_rewards/volume_coverage_15": 4.17547902503701e-09,
"eval_rewards/volume_coverage_20": 4.32811551043244e-08,
"eval_rewards/volume_coverage_25": 4.155160429064608e-07,
"eval_rewards/volume_coverage_5": 1.088547529892736e-09,
"eval_runtime": 18.8068,
"eval_samples_per_second": 26.586,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4661865234375,
"eval_signal/accuracy_reward/group_std_mean": 0.49009719491004944,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23309326171875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23309326171875,
"eval_signal/advantage_abs_mean": 0.23672707751393318,
"eval_signal/advantage_pre_scale_abs_mean": 0.23672707751393318,
"eval_signal/advantage_pre_scale_std": 0.24762531742453575,
"eval_signal/advantage_std": 0.24762531742453575,
"eval_signal/brier_reward/centered_abs_mean": 0.19656691700220108,
"eval_signal/brier_reward/group_std_mean": 0.24666643142700195,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019656691700220108,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019656691700220108,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0359039306640625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.042795900255441666,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035903931129723787,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035903931129723787,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005310411681421101,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.00943018146790564,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.638014838244999e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.638014838244999e-05,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.02532364521175623,
"eval_signal/frontier_ece_reward/group_std_mean": 0.035478693433105946,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025323645095340908,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025323645095340908,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/volume_coverage_0/centered_abs_mean": 2.4198223147475773e-09,
"eval_signal/volume_coverage_0/group_std_mean": 3.0092013880711477e-09,
"eval_signal/volume_coverage_0/group_zero_std_frac": 0.9375,
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.419822398014304e-10,
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 2.419822398014304e-10,
"eval_signal/volume_coverage_1/centered_abs_mean": 2.4198223147475773e-09,
"eval_signal/volume_coverage_1/group_std_mean": 3.0092013880711477e-09,
"eval_signal/volume_coverage_1/group_zero_std_frac": 0.9375,
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.419822398014304e-10,
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 2.419822398014304e-10,
"eval_signal/volume_coverage_10/centered_abs_mean": 7.609845725786712e-09,
"eval_signal/volume_coverage_10/group_std_mean": 9.562367619952994e-09,
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.6875,
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.609846051914726e-10,
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 7.609846051914726e-10,
"eval_signal/volume_coverage_15/centered_abs_mean": 1.0576835796083373e-08,
"eval_signal/volume_coverage_15/group_std_mean": 1.3268543486155693e-08,
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.625,
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.057683600425019e-09,
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 1.057683600425019e-09,
"eval_signal/volume_coverage_20/centered_abs_mean": 6.025526255726277e-08,
"eval_signal/volume_coverage_20/group_std_mean": 7.222980880072782e-08,
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.5,
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.025526522179803e-09,
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 6.025526522179803e-09,
"eval_signal/volume_coverage_25/centered_abs_mean": 5.649726659839871e-07,
"eval_signal/volume_coverage_25/group_std_mean": 6.848031688377887e-07,
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.4375,
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 5.6497268641209075e-08,
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 5.6497268641209075e-08,
"eval_signal/volume_coverage_5/centered_abs_mean": 2.4198223147475773e-09,
"eval_signal/volume_coverage_5/group_std_mean": 3.0092013880711477e-09,
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.9375,
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.419822398014304e-10,
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 2.419822398014304e-10,
"eval_steps_per_second": 0.213,
"step": 150
},
{
"calibration/aurc": 0.4152612608407611,
"calibration/batch_distribution_entropy": 0.9864104103777949,
"calibration/buffer_distribution_entropy": 0.999336643054435,
"calibration/confidence_entropy": 0.49038454056488645,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.013671875,
"calibration/coverage@15%": 0.123046875,
"calibration/coverage@20%": 0.184765625,
"calibration/coverage@25%": 0.232421875,
"calibration/coverage@30%": 0.28046875,
"calibration/coverage@5%": 0.00546875,
"calibration/ece": 0.14502903763292493,
"calibration/mean_confidence": 0.5353343759131202,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 468.4,
"completions/max_terminated_length": 468.4,
"completions/mean_length": 177.9943359375,
"completions/mean_terminated_length": 178.02895812988282,
"completions/min_length": 49.4,
"completions/min_terminated_length": 76.0,
"epoch": 0.496,
"grad_norm": 0.0010539100039750338,
"learning_rate": 1e-06,
"loss": -0.0007,
"num_tokens": 513248289.0,
"reward": 0.9261873483657836,
"reward_std": 0.09546184092760086,
"rewards/accuracy_reward": 0.5423828125,
"rewards/brier_reward": 0.7670300483703614,
"rewards/confidence_uniqueness_reward": 0.958674430847168,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0033624432515352966,
"rewards/frontier_ece_reward": 0.010776512883603572,
"rewards/frontier_entropy_batch_reward": -0.18512516021728515,
"rewards/volume_coverage_0": 5.505965895019615e-10,
"rewards/volume_coverage_1": 5.505965895019615e-10,
"rewards/volume_coverage_10": 1.0167482748096867e-09,
"rewards/volume_coverage_15": 3.6165074934757515e-09,
"rewards/volume_coverage_20": 3.7402842067990604e-08,
"rewards/volume_coverage_25": 6.125171125859197e-07,
"rewards/volume_coverage_5": 5.505965895019615e-10,
"signal/accuracy_reward/centered_abs_mean": 0.09814453125,
"signal/accuracy_reward/group_std_mean": 0.13675257414579392,
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049072265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049072265625,
"signal/advantage_abs_mean": 0.07115045487880707,
"signal/advantage_pre_scale_abs_mean": 0.07115045487880707,
"signal/advantage_pre_scale_std": 0.11864184141159058,
"signal/advantage_std": 0.11864184141159058,
"signal/brier_reward/centered_abs_mean": 0.1540187805891037,
"signal/brier_reward/group_std_mean": 0.19588074684143067,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015401878207921983,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015401878207921983,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012197751179337501,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015673059970140457,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012197751319035887,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012197751319035887,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0037505661603063345,
"signal/frontier_aurc_reward/group_std_mean": 0.006236158590763807,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.688207554863766e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.688207554863766e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.020873067528009416,
"signal/frontier_ece_reward/group_std_mean": 0.02787470892071724,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020873067667707803,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020873067667707803,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27026124596595763,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34854318499565123,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0270261250436306,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0270261250436306,
"signal/volume_coverage_0/centered_abs_mean": 7.434120190968896e-10,
"signal/volume_coverage_0/group_std_mean": 9.770461585623114e-10,
"signal/volume_coverage_0/group_zero_std_frac": 0.975,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.434120428626012e-11,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.434120428626012e-11,
"signal/volume_coverage_1/centered_abs_mean": 7.434120190968896e-10,
"signal/volume_coverage_1/group_std_mean": 9.770461585623114e-10,
"signal/volume_coverage_1/group_zero_std_frac": 0.975,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.434120428626012e-11,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.434120428626012e-11,
"signal/volume_coverage_10/centered_abs_mean": 2.5940776440108727e-09,
"signal/volume_coverage_10/group_std_mean": 3.379073677117761e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.903125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.594077788513338e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.594077788513338e-10,
"signal/volume_coverage_15/centered_abs_mean": 5.8909916944571705e-09,
"signal/volume_coverage_15/group_std_mean": 7.582481664769603e-09,
"signal/volume_coverage_15/group_zero_std_frac": 0.778125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.890991722212746e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.890991722212746e-10,
"signal/volume_coverage_20/centered_abs_mean": 6.449160707688862e-08,
"signal/volume_coverage_20/group_std_mean": 8.150530064199302e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.46875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.449160672161725e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 6.449160672161725e-09,
"signal/volume_coverage_25/centered_abs_mean": 8.17778817463477e-07,
"signal/volume_coverage_25/group_std_mean": 1.035122755865814e-06,
"signal/volume_coverage_25/group_zero_std_frac": 0.259375,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 8.177788402008446e-08,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 8.177788402008446e-08,
"signal/volume_coverage_5/centered_abs_mean": 7.434120190968896e-10,
"signal/volume_coverage_5/group_std_mean": 9.770461585623114e-10,
"signal/volume_coverage_5/group_zero_std_frac": 0.975,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.434120428626012e-11,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 7.434120428626012e-11,
"step": 155
},
{
"calibration/aurc": 0.36293904971933433,
"calibration/batch_distribution_entropy": 0.9848749541280609,
"calibration/buffer_distribution_entropy": 0.9992323219323538,
"calibration/confidence_entropy": 0.518724464749172,
"calibration/coverage@0%": 0.012519110812133073,
"calibration/coverage@1%": 0.012519110812133073,
"calibration/coverage@10%": 0.1498570511252446,
"calibration/coverage@15%": 0.19367279231898238,
"calibration/coverage@20%": 0.26053235078277887,
"calibration/coverage@25%": 0.37265625,
"calibration/coverage@30%": 0.4265625,
"calibration/coverage@5%": 0.08414337695694715,
"calibration/ece": 0.15338829376624863,
"calibration/mean_confidence": 0.4998068643477344,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 478.2,
"completions/max_terminated_length": 478.2,
"completions/mean_length": 180.275390625,
"completions/mean_terminated_length": 180.32793579101562,
"completions/min_length": 69.0,
"completions/min_terminated_length": 87.2,
"epoch": 0.512,
"grad_norm": 0.0010503968223929405,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 530239973.0,
"reward": 0.9264042139053345,
"reward_std": 0.09568149298429489,
"rewards/accuracy_reward": 0.5359375,
"rewards/brier_reward": 0.78333660364151,
"rewards/confidence_uniqueness_reward": 0.9571387529373169,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0028535844292491676,
"rewards/frontier_ece_reward": 0.0107097577303648,
"rewards/frontier_entropy_batch_reward": -0.16500933170318605,
"rewards/volume_coverage_0": 1.9148062357920993e-09,
"rewards/volume_coverage_1": 1.9148062357920993e-09,
"rewards/volume_coverage_10": 2.77004001958403e-09,
"rewards/volume_coverage_15": 5.0034791865272155e-09,
"rewards/volume_coverage_20": 2.125222913207381e-08,
"rewards/volume_coverage_25": 4.308101402727971e-07,
"rewards/volume_coverage_5": 1.9148062357920993e-09,
"signal/accuracy_reward/centered_abs_mean": 0.10535888671875,
"signal/accuracy_reward/group_std_mean": 0.14029736816883087,
"signal/accuracy_reward/group_zero_std_frac": 0.59375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052679443359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052679443359375,
"signal/advantage_abs_mean": 0.07290942072868348,
"signal/advantage_pre_scale_abs_mean": 0.07290942072868348,
"signal/advantage_pre_scale_std": 0.12146659642457962,
"signal/advantage_std": 0.12146659642457962,
"signal/brier_reward/centered_abs_mean": 0.14373116195201874,
"signal/brier_reward/group_std_mean": 0.18439054489135742,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014373116195201874,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014373116195201874,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011983883008360863,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015894040279090405,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011983883334323764,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011983883334323764,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814434766769,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032041643280535936,
"signal/frontier_aurc_reward/group_std_mean": 0.005314776767045259,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.005205410066992e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.005205410066992e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.018103313446044923,
"signal/frontier_ece_reward/group_std_mean": 0.024262651801109314,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001810331386514008,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001810331386514008,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.248811736702919,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32705228924751284,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02488117404282093,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02488117404282093,
"signal/volume_coverage_0/centered_abs_mean": 2.6785904161386044e-09,
"signal/volume_coverage_0/group_std_mean": 3.432433171024485e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.6785905160586766e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.6785905160586766e-10,
"signal/volume_coverage_1/centered_abs_mean": 2.6785904161386044e-09,
"signal/volume_coverage_1/group_std_mean": 3.432433171024485e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.6785905160586766e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.6785905160586766e-10,
"signal/volume_coverage_10/centered_abs_mean": 4.61633842263609e-09,
"signal/volume_coverage_10/group_std_mean": 5.933983215911099e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.71875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.616338533658393e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.616338533658393e-10,
"signal/volume_coverage_15/centered_abs_mean": 7.912232824480724e-09,
"signal/volume_coverage_15/group_std_mean": 1.0156219509838138e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.63125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.912232757867344e-10,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 7.912232757867344e-10,
"signal/volume_coverage_20/centered_abs_mean": 4.049936386252284e-08,
"signal/volume_coverage_20/group_std_mean": 5.1828737213099883e-08,
"signal/volume_coverage_20/group_zero_std_frac": 0.215625,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.049936364047823e-09,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 4.049936364047823e-09,
"signal/volume_coverage_25/centered_abs_mean": 7.110435547019733e-07,
"signal/volume_coverage_25/group_std_mean": 9.2786052334759e-07,
"signal/volume_coverage_25/group_zero_std_frac": 0.009375,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 7.110435440438323e-08,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 7.110435440438323e-08,
"signal/volume_coverage_5/centered_abs_mean": 2.6785904161386044e-09,
"signal/volume_coverage_5/group_std_mean": 3.432433171024485e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.6785905160586766e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.6785905160586766e-10,
"step": 160
},
{
"calibration/aurc": 0.23877614445444467,
"calibration/batch_distribution_entropy": 0.9927720074762381,
"calibration/buffer_distribution_entropy": 0.9990736524579434,
"calibration/confidence_entropy": 0.5136030382710748,
"calibration/coverage@0%": 0.025390625,
"calibration/coverage@1%": 0.025390625,
"calibration/coverage@10%": 0.174609375,
"calibration/coverage@15%": 0.30789658757338556,
"calibration/coverage@20%": 0.43612555039138945,
"calibration/coverage@25%": 0.5604436766144814,
"calibration/coverage@30%": 0.6866812928082192,
"calibration/coverage@5%": 0.058984375,
"calibration/ece": 0.09971362930713529,
"calibration/mean_confidence": 0.521555718243601,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 693.8,
"completions/max_terminated_length": 693.8,
"completions/mean_length": 189.43447265625,
"completions/mean_terminated_length": 189.56402893066405,
"completions/min_length": 16.6,
"completions/min_terminated_length": 83.4,
"epoch": 0.528,
"grad_norm": 0.0013206545263528824,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 547209318.0,
"reward": 0.9277357578277587,
"reward_std": 0.09738266915082931,
"rewards/accuracy_reward": 0.5392578125,
"rewards/brier_reward": 0.7904103755950928,
"rewards/confidence_uniqueness_reward": 0.9558773875236511,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.002641553757712245,
"rewards/frontier_ece_reward": 0.01046077199280262,
"rewards/frontier_entropy_batch_reward": -0.17193247377872467,
"rewards/volume_coverage_0": 7.601244911281668e-10,
"rewards/volume_coverage_1": 7.601244911281668e-10,
"rewards/volume_coverage_10": 1.615653721920296e-09,
"rewards/volume_coverage_15": 8.446261243882925e-09,
"rewards/volume_coverage_20": 7.996970861623432e-08,
"rewards/volume_coverage_25": 7.100671410853465e-07,
"rewards/volume_coverage_5": 7.601244911281668e-10,
"signal/accuracy_reward/centered_abs_mean": 0.1120849609375,
"signal/accuracy_reward/group_std_mean": 0.14580150246620177,
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05604248046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05604248046875,
"signal/advantage_abs_mean": 0.07535901814699172,
"signal/advantage_pre_scale_abs_mean": 0.07535901814699172,
"signal/advantage_pre_scale_std": 0.12386409789323807,
"signal/advantage_std": 0.12386409789323807,
"signal/brier_reward/centered_abs_mean": 0.14510386288166047,
"signal/brier_reward/group_std_mean": 0.1865969717502594,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014510386623442173,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014510386623442173,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012707922607660294,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0176456106826663,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012707923073321582,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012707923073321582,
"signal/format_reward/centered_abs_mean": 0.001312255859375,
"signal/format_reward/group_std_mean": 0.0035306816454976795,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00314983818680048,
"signal/frontier_aurc_reward/group_std_mean": 0.005508489906787872,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.937297806260176e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.937297806260176e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.01709262691438198,
"signal/frontier_ece_reward/group_std_mean": 0.022918767482042312,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017092627473175525,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017092627473175525,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2518620818853378,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3292876541614532,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02518620789051056,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02518620789051056,
"signal/volume_coverage_0/centered_abs_mean": 1.399367510757088e-09,
"signal/volume_coverage_0/group_std_mean": 1.810499622445505e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.978125,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.3993674587153837e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.3993674587153837e-10,
"signal/volume_coverage_1/centered_abs_mean": 1.399367510757088e-09,
"signal/volume_coverage_1/group_std_mean": 1.810499622445505e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.978125,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.3993674587153837e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.3993674587153837e-10,
"signal/volume_coverage_10/centered_abs_mean": 3.065493392462315e-09,
"signal/volume_coverage_10/group_std_mean": 3.9665126294607946e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.86875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.0654934701779266e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.0654934701779266e-10,
"signal/volume_coverage_15/centered_abs_mean": 1.5146079235250908e-08,
"signal/volume_coverage_15/group_std_mean": 1.9439614362681823e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.6,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.5146079890282493e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.5146079890282493e-09,
"signal/volume_coverage_20/centered_abs_mean": 1.3799506817235852e-07,
"signal/volume_coverage_20/group_std_mean": 1.7752451881847263e-07,
"signal/volume_coverage_20/group_zero_std_frac": 0.115625,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.3799507359024688e-08,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.3799507359024688e-08,
"signal/volume_coverage_25/centered_abs_mean": 1.6232085556566743e-06,
"signal/volume_coverage_25/group_std_mean": 2.090263268428316e-06,
"signal/volume_coverage_25/group_zero_std_frac": 0.003125,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.6232085329193068e-07,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.6232085329193068e-07,
"signal/volume_coverage_5/centered_abs_mean": 1.399367510757088e-09,
"signal/volume_coverage_5/group_std_mean": 1.810499622445505e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.978125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.3993674587153837e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.3993674587153837e-10,
"step": 165
},
{
"calibration/aurc": 0.284975488615337,
"calibration/batch_distribution_entropy": 0.981906659826009,
"calibration/buffer_distribution_entropy": 0.9989556933707497,
"calibration/confidence_entropy": 0.4936425820133298,
"calibration/coverage@0%": 0.000390625,
"calibration/coverage@1%": 0.000390625,
"calibration/coverage@10%": 0.000390625,
"calibration/coverage@15%": 0.00546875,
"calibration/coverage@20%": 0.27891083659491195,
"calibration/coverage@25%": 0.5182225415851273,
"calibration/coverage@30%": 0.625707864481409,
"calibration/coverage@5%": 0.000390625,
"calibration/ece": 0.10303824341155698,
"calibration/mean_confidence": 0.5469045699978488,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 664.6,
"completions/max_terminated_length": 664.6,
"completions/mean_length": 193.5,
"completions/mean_terminated_length": 193.5752746582031,
"completions/min_length": 33.4,
"completions/min_terminated_length": 78.6,
"epoch": 0.544,
"grad_norm": 0.0013264644658192992,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 564354342.0,
"reward": 0.9393848657608033,
"reward_std": 0.10854032784700393,
"rewards/accuracy_reward": 0.57236328125,
"rewards/brier_reward": 0.7735715985298157,
"rewards/confidence_uniqueness_reward": 0.9557937264442444,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.003230233443900943,
"rewards/frontier_ece_reward": 0.008333365246653558,
"rewards/frontier_entropy_batch_reward": -0.20332232415676116,
"rewards/volume_coverage_0": 3.671275700112275e-10,
"rewards/volume_coverage_1": 3.671275700112275e-10,
"rewards/volume_coverage_10": 1.0478441445083141e-08,
"rewards/volume_coverage_15": 2.614181688898043e-08,
"rewards/volume_coverage_20": 1.0166868236183291e-07,
"rewards/volume_coverage_25": 1.2672042976191733e-05,
"rewards/volume_coverage_5": 3.671275700112275e-10,
"signal/accuracy_reward/centered_abs_mean": 0.127337646484375,
"signal/accuracy_reward/group_std_mean": 0.16760770380496978,
"signal/accuracy_reward/group_zero_std_frac": 0.525,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0636688232421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0636688232421875,
"signal/advantage_abs_mean": 0.08374463021755219,
"signal/advantage_pre_scale_abs_mean": 0.08374463021755219,
"signal/advantage_pre_scale_std": 0.13230954706668854,
"signal/advantage_std": 0.13230954706668854,
"signal/brier_reward/centered_abs_mean": 0.1559443324804306,
"signal/brier_reward/group_std_mean": 0.19659957587718963,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015594434179365634,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015594434179365634,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01252935416996479,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016918303444981575,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001252935454249382,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001252935454249382,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0038732617627829312,
"signal/frontier_aurc_reward/group_std_mean": 0.006673902738839388,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.841577319893986e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.841577319893986e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.01696496121585369,
"signal/frontier_ece_reward/group_std_mean": 0.022660358622670174,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016964962240308523,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016964962240308523,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28068632185459136,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3599223792552948,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028068631887435913,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028068631887435913,
"signal/volume_coverage_0/centered_abs_mean": 1.9625665315103903e-09,
"signal/volume_coverage_0/group_std_mean": 2.518391584160895e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.953125,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.9625665537148507e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.9625665537148507e-10,
"signal/volume_coverage_1/centered_abs_mean": 1.9625665315103903e-09,
"signal/volume_coverage_1/group_std_mean": 2.518391584160895e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.953125,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.9625665537148507e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.9625665537148507e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.6223874804998103e-08,
"signal/volume_coverage_10/group_std_mean": 3.3586331760204755e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.6875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.6223874849407027e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.6223874849407027e-09,
"signal/volume_coverage_15/centered_abs_mean": 6.304688220382104e-08,
"signal/volume_coverage_15/group_std_mean": 8.066067636036678e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.5625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.304688282554594e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 6.304688282554594e-09,
"signal/volume_coverage_20/centered_abs_mean": 2.755489987293913e-07,
"signal/volume_coverage_20/group_std_mean": 3.5120059465043597e-07,
"signal/volume_coverage_20/group_zero_std_frac": 0.1125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.7554899517667764e-08,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.7554899517667764e-08,
"signal/volume_coverage_25/centered_abs_mean": 2.7111944245916674e-05,
"signal/volume_coverage_25/group_std_mean": 3.509574339659594e-05,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.711194588300714e-06,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.711194588300714e-06,
"signal/volume_coverage_5/centered_abs_mean": 1.9625665315103903e-09,
"signal/volume_coverage_5/group_std_mean": 2.518391584160895e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.953125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.9625665537148507e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.9625665537148507e-10,
"step": 170
},
{
"calibration/aurc": 0.28625889515309394,
"calibration/batch_distribution_entropy": 0.9849145550016477,
"calibration/buffer_distribution_entropy": 0.9989275472363378,
"calibration/confidence_entropy": 0.5021353698713945,
"calibration/coverage@0%": 0.00625,
"calibration/coverage@1%": 0.00625,
"calibration/coverage@10%": 0.22385946673189822,
"calibration/coverage@15%": 0.2813050391389432,
"calibration/coverage@20%": 0.32352158757338556,
"calibration/coverage@25%": 0.41536127079256363,
"calibration/coverage@30%": 0.5037388392857143,
"calibration/coverage@5%": 0.129296875,
"calibration/ece": 0.14045019719409385,
"calibration/mean_confidence": 0.47354560342460267,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 542.8,
"completions/max_terminated_length": 542.8,
"completions/mean_length": 189.28076171875,
"completions/mean_terminated_length": 189.39351501464844,
"completions/min_length": 31.2,
"completions/min_terminated_length": 81.4,
"epoch": 0.56,
"grad_norm": 0.00103695597499609,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 581113985.0,
"reward": 0.9196316242218018,
"reward_std": 0.08974321335554122,
"rewards/accuracy_reward": 0.5251953125,
"rewards/brier_reward": 0.7853912115097046,
"rewards/confidence_uniqueness_reward": 0.9560101389884949,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.002737493207678199,
"rewards/frontier_ece_reward": 0.00851745791733265,
"rewards/frontier_entropy_batch_reward": -0.1759500414133072,
"rewards/volume_coverage_0": 2.1538784755748e-09,
"rewards/volume_coverage_1": 2.1538784755748e-09,
"rewards/volume_coverage_10": 1.186941149811105e-08,
"rewards/volume_coverage_15": 1.8020547543073918e-08,
"rewards/volume_coverage_20": 7.627865983295123e-08,
"rewards/volume_coverage_25": 0.00013089905551169067,
"rewards/volume_coverage_5": 5.395910451788666e-09,
"signal/accuracy_reward/centered_abs_mean": 0.09229736328125,
"signal/accuracy_reward/group_std_mean": 0.12655276507139207,
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046148681640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.046148681640625,
"signal/advantage_abs_mean": 0.06730285361409187,
"signal/advantage_pre_scale_abs_mean": 0.06730285361409187,
"signal/advantage_pre_scale_std": 0.11310036927461624,
"signal/advantage_std": 0.11310036927461624,
"signal/brier_reward/centered_abs_mean": 0.14888992309570312,
"signal/brier_reward/group_std_mean": 0.19088106453418732,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01488899253308773,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01488899253308773,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012639607675373555,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017911006696522236,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012639608001336455,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012639608001336455,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.003866990189999342,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029790752567350866,
"signal/frontier_aurc_reward/group_std_mean": 0.00507925059646368,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.723844129126519e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.723844129126519e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.014743305556476117,
"signal/frontier_ece_reward/group_std_mean": 0.02005004920065403,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014743305975571275,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014743305975571275,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25941252410411836,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3358436286449432,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02594125233590603,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02594125233590603,
"signal/volume_coverage_0/centered_abs_mean": 3.6427160843288677e-09,
"signal/volume_coverage_0/group_std_mean": 4.646901397009628e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.89375,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.6427163507823936e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 3.6427163507823936e-10,
"signal/volume_coverage_1/centered_abs_mean": 3.6427160843288677e-09,
"signal/volume_coverage_1/group_std_mean": 4.646901397009628e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.89375,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.6427163507823936e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 3.6427163507823936e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.4844116957846154e-08,
"signal/volume_coverage_10/group_std_mean": 1.899510362335377e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.634375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4844117379730904e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.4844117379730904e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.3456014375256017e-08,
"signal/volume_coverage_15/group_std_mean": 2.994891801222366e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.403125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.345601579634149e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.345601579634149e-09,
"signal/volume_coverage_20/centered_abs_mean": 1.5019926422610296e-07,
"signal/volume_coverage_20/group_std_mean": 1.9201455927486677e-07,
"signal/volume_coverage_20/group_zero_std_frac": 0.034375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.501992628050175e-08,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.501992628050175e-08,
"signal/volume_coverage_25/centered_abs_mean": 0.000258388533256948,
"signal/volume_coverage_25/group_std_mean": 0.00032922495738603176,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.5838854526227806e-05,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.5838854526227806e-05,
"signal/volume_coverage_5/centered_abs_mean": 7.795711720604004e-09,
"signal/volume_coverage_5/group_std_mean": 9.911290277386798e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.778125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.795711631786162e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 7.795711631786162e-10,
"step": 175
},
{
"calibration/aurc": 0.33577627934999876,
"calibration/batch_distribution_entropy": 0.9838566498258011,
"calibration/buffer_distribution_entropy": 0.99915950902613,
"calibration/confidence_entropy": 0.5019535715581608,
"calibration/coverage@0%": 0.005880821078431372,
"calibration/coverage@1%": 0.005880821078431372,
"calibration/coverage@10%": 0.09410232843137255,
"calibration/coverage@15%": 0.1340441176470588,
"calibration/coverage@20%": 0.23295802696078433,
"calibration/coverage@25%": 0.2845726102941176,
"calibration/coverage@30%": 0.4221292892156862,
"calibration/coverage@5%": 0.04509650735294118,
"calibration/ece": 0.10756879220322679,
"calibration/mean_confidence": 0.4687018438753654,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00078125,
"completions/max_length": 600.8,
"completions/max_terminated_length": 600.8,
"completions/mean_length": 183.75869140625,
"completions/mean_terminated_length": 183.90218505859374,
"completions/min_length": 16.4,
"completions/min_terminated_length": 83.2,
"epoch": 0.576,
"grad_norm": 0.0010323330061510205,
"learning_rate": 1e-06,
"loss": -0.0005,
"num_tokens": 598182298.0,
"reward": 0.912653386592865,
"reward_std": 0.0918677106499672,
"rewards/accuracy_reward": 0.51796875,
"rewards/brier_reward": 0.7720121502876282,
"rewards/confidence_uniqueness_reward": 0.9561181187629699,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.00320719201117754,
"rewards/frontier_ece_reward": 0.006886793207377196,
"rewards/frontier_entropy_batch_reward": -0.19370051622390747,
"rewards/volume_coverage_0": 1.4855961287529862e-09,
"rewards/volume_coverage_1": 1.4855961287529862e-09,
"rewards/volume_coverage_10": 7.633383480865064e-09,
"rewards/volume_coverage_15": 8.681704422031088e-09,
"rewards/volume_coverage_20": 1.326518031419255e-07,
"rewards/volume_coverage_25": 0.00016885874574654736,
"rewards/volume_coverage_5": 1.3487472694251324e-09,
"signal/accuracy_reward/centered_abs_mean": 0.09427490234375,
"signal/accuracy_reward/group_std_mean": 0.13005066514015198,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047137451171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047137451171875,
"signal/advantage_abs_mean": 0.06886442601680756,
"signal/advantage_pre_scale_abs_mean": 0.06886442601680756,
"signal/advantage_pre_scale_std": 0.11540580689907073,
"signal/advantage_std": 0.11540580689907073,
"signal/brier_reward/centered_abs_mean": 0.15298969745635987,
"signal/brier_reward/group_std_mean": 0.196148481965065,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015298970974981785,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015298970974981785,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014024058356881142,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01963724195957184,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014024058356881142,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014024058356881142,
"signal/format_reward/centered_abs_mean": 0.001678466796875,
"signal/format_reward/group_std_mean": 0.004299227613955736,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008392333984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008392333984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034444320946931837,
"signal/frontier_aurc_reward/group_std_mean": 0.006053841486573219,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.305540132918395e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.305540132918395e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.013865727372467518,
"signal/frontier_ece_reward/group_std_mean": 0.018852605298161507,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001386572769843042,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001386572769843042,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27207915782928466,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.349167013168335,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02720791697502136,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02720791697502136,
"signal/volume_coverage_0/centered_abs_mean": 2.913903074386326e-09,
"signal/volume_coverage_0/group_std_mean": 3.67858619121364e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.8625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.9139030077729444e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.9139030077729444e-10,
"signal/volume_coverage_1/centered_abs_mean": 2.913903074386326e-09,
"signal/volume_coverage_1/group_std_mean": 3.67858619121364e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.8625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.9139030077729444e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.9139030077729444e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.4660136571365001e-08,
"signal/volume_coverage_10/group_std_mean": 1.8583565264407297e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.6625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4660137548361263e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.4660137548361263e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.8842995369871006e-08,
"signal/volume_coverage_15/group_std_mean": 3.6737129605057814e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.51875,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.88429959915959e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.88429959915959e-09,
"signal/volume_coverage_20/centered_abs_mean": 2.416134393001812e-07,
"signal/volume_coverage_20/group_std_mean": 3.0170963327691425e-07,
"signal/volume_coverage_20/group_zero_std_frac": 0.0875,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.4161344569506583e-08,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.4161344569506583e-08,
"signal/volume_coverage_25/centered_abs_mean": 0.00029829425329808146,
"signal/volume_coverage_25/group_std_mean": 0.00037817141273990276,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.9829425693606025e-05,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.9829425693606025e-05,
"signal/volume_coverage_5/centered_abs_mean": 4.639043682530541e-09,
"signal/volume_coverage_5/group_std_mean": 5.895791987953203e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.825,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.6390438157573046e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.6390438157573046e-10,
"step": 180
},
{
"calibration/aurc": 0.32942794396551883,
"calibration/batch_distribution_entropy": 0.9829910047287995,
"calibration/buffer_distribution_entropy": 0.9992248661659392,
"calibration/confidence_entropy": 0.48282500606721623,
"calibration/coverage@0%": 0.013679534313725488,
"calibration/coverage@1%": 0.013679534313725488,
"calibration/coverage@10%": 0.0739813112745098,
"calibration/coverage@15%": 0.1698452818627451,
"calibration/coverage@20%": 0.35943933823529417,
"calibration/coverage@25%": 0.4762928921568627,
"calibration/coverage@30%": 0.5509574142156863,
"calibration/coverage@5%": 0.014070159313725489,
"calibration/ece": 0.1396739366660511,
"calibration/mean_confidence": 0.501586015132056,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 570.0,
"completions/max_terminated_length": 570.0,
"completions/mean_length": 176.1791015625,
"completions/mean_terminated_length": 176.28189086914062,
"completions/min_length": 31.8,
"completions/min_terminated_length": 75.2,
"epoch": 0.592,
"grad_norm": 0.0011522769927978516,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 615154084.0,
"reward": 0.9166985511779785,
"reward_std": 0.09232619255781174,
"rewards/accuracy_reward": 0.523046875,
"rewards/brier_reward": 0.7763656497001648,
"rewards/confidence_uniqueness_reward": 0.9593760132789612,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.003226546896621585,
"rewards/frontier_ece_reward": 0.0073450343683362,
"rewards/frontier_entropy_batch_reward": -0.1882669061422348,
"rewards/volume_coverage_0": 1.7111413064263558e-09,
"rewards/volume_coverage_1": 1.7111413064263558e-09,
"rewards/volume_coverage_10": 7.761732589983694e-09,
"rewards/volume_coverage_15": 4.0289498137724425e-08,
"rewards/volume_coverage_20": 4.315438417279438e-07,
"rewards/volume_coverage_25": 0.0002638868114445359,
"rewards/volume_coverage_5": 3.0811049600565355e-09,
"signal/accuracy_reward/centered_abs_mean": 0.099951171875,
"signal/accuracy_reward/group_std_mean": 0.13282042741775513,
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0499755859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0499755859375,
"signal/advantage_abs_mean": 0.07076025158166885,
"signal/advantage_pre_scale_abs_mean": 0.07076025158166885,
"signal/advantage_pre_scale_std": 0.11678868681192398,
"signal/advantage_std": 0.11678868681192398,
"signal/brier_reward/centered_abs_mean": 0.15273409485816955,
"signal/brier_reward/group_std_mean": 0.19397153854370117,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015273409895598888,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015273409895598888,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012779767252504825,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01754718404263258,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012779767625033856,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012779767625033856,
"signal/format_reward/centered_abs_mean": 0.001123046875,
"signal/format_reward/group_std_mean": 0.0029782545287162067,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005615234375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036609724164009096,
"signal/frontier_aurc_reward/group_std_mean": 0.006075662653893232,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.576215505949222e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.576215505949222e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.013755329139530658,
"signal/frontier_ece_reward/group_std_mean": 0.018514570221304892,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013755329186096788,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013755329186096788,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26485961079597475,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3429957151412964,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02648596204817295,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02648596204817295,
"signal/volume_coverage_0/centered_abs_mean": 4.143559362290716e-09,
"signal/volume_coverage_0/group_std_mean": 5.253447454833804e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.759375,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.143559273472874e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.143559273472874e-10,
"signal/volume_coverage_1/centered_abs_mean": 4.143559362290716e-09,
"signal/volume_coverage_1/group_std_mean": 5.253447454833804e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.759375,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.143559273472874e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.143559273472874e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.8865098816434056e-08,
"signal/volume_coverage_10/group_std_mean": 2.3865029952219174e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.496875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.8865098816434054e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.8865098816434054e-09,
"signal/volume_coverage_15/centered_abs_mean": 5.947744803336264e-08,
"signal/volume_coverage_15/group_std_mean": 7.626531512983093e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.43125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.947744918799458e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.947744918799458e-09,
"signal/volume_coverage_20/centered_abs_mean": 6.130203587417781e-07,
"signal/volume_coverage_20/group_std_mean": 7.699923173731804e-07,
"signal/volume_coverage_20/group_zero_std_frac": 0.109375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.130203438203807e-08,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 6.130203438203807e-08,
"signal/volume_coverage_25/centered_abs_mean": 0.0005171356489881874,
"signal/volume_coverage_25/group_std_mean": 0.0006737531046383082,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 5.171356460778043e-05,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 5.171356460778043e-05,
"signal/volume_coverage_5/centered_abs_mean": 7.04651075267293e-09,
"signal/volume_coverage_5/group_std_mean": 8.822214336845491e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.721875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.046511152353219e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 7.046511152353219e-10,
"step": 185
},
{
"calibration/aurc": 0.25322455275355066,
"calibration/batch_distribution_entropy": 0.9823324911562663,
"calibration/buffer_distribution_entropy": 0.9992289443404798,
"calibration/confidence_entropy": 0.4846762651810278,
"calibration/coverage@0%": 0.011338071615632554,
"calibration/coverage@1%": 0.011338071615632554,
"calibration/coverage@10%": 0.0953729980862208,
"calibration/coverage@15%": 0.32919649452246647,
"calibration/coverage@20%": 0.48641912544127236,
"calibration/coverage@25%": 0.5775105431727485,
"calibration/coverage@30%": 0.684254498460343,
"calibration/coverage@5%": 0.039463071615632554,
"calibration/ece": 0.09481818543349048,
"calibration/mean_confidence": 0.5019350730358498,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 602.4,
"completions/max_terminated_length": 602.4,
"completions/mean_length": 175.944140625,
"completions/mean_terminated_length": 176.04876098632812,
"completions/min_length": 45.6,
"completions/min_terminated_length": 75.4,
"epoch": 0.608,
"grad_norm": 0.0010520720388740301,
"learning_rate": 1e-06,
"loss": -0.0004,
"num_tokens": 631955240.0,
"reward": 0.921320378780365,
"reward_std": 0.09033734798431396,
"rewards/accuracy_reward": 0.5259765625,
"rewards/brier_reward": 0.7994609236717224,
"rewards/confidence_uniqueness_reward": 0.9615864753723145,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.002751393895596266,
"rewards/frontier_ece_reward": 0.008317224588245154,
"rewards/frontier_entropy_batch_reward": -0.18277242481708528,
"rewards/volume_coverage_0": 1.578650821176808e-09,
"rewards/volume_coverage_1": 1.578650821176808e-09,
"rewards/volume_coverage_10": 1.3228030226741794e-08,
"rewards/volume_coverage_15": 3.540733644058491e-08,
"rewards/volume_coverage_20": 8.138225325637905e-07,
"rewards/volume_coverage_25": 0.0009779959451407193,
"rewards/volume_coverage_5": 2.3707905971726007e-09,
"signal/accuracy_reward/centered_abs_mean": 0.09971923828125,
"signal/accuracy_reward/group_std_mean": 0.12960658967494965,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049859619140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049859619140625,
"signal/advantage_abs_mean": 0.06983233988285065,
"signal/advantage_pre_scale_abs_mean": 0.06983233988285065,
"signal/advantage_pre_scale_std": 0.11588151454925537,
"signal/advantage_std": 0.11588151454925537,
"signal/brier_reward/centered_abs_mean": 0.1423773616552353,
"signal/brier_reward/group_std_mean": 0.18201495707035065,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014237736538052558,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014237736538052558,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012686197459697724,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017919574119150638,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012686197878792882,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012686197878792882,
"signal/format_reward/centered_abs_mean": 0.0014892578125,
"signal/format_reward/group_std_mean": 0.003746800497174263,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00074462890625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00074462890625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031977016944438217,
"signal/frontier_aurc_reward/group_std_mean": 0.005458212643861771,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.99712698708754e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.99712698708754e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.012652770802378654,
"signal/frontier_ece_reward/group_std_mean": 0.016831176169216634,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012652770616114139,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012652770616114139,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26298512816429137,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33757553100585935,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026298512518405915,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026298512518405915,
"signal/volume_coverage_0/centered_abs_mean": 2.830159973044033e-09,
"signal/volume_coverage_0/group_std_mean": 3.566805295562858e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.8875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.8301599952484935e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.8301599952484935e-10,
"signal/volume_coverage_1/centered_abs_mean": 2.830159973044033e-09,
"signal/volume_coverage_1/group_std_mean": 3.566805295562858e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.8875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.8301599952484935e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.8301599952484935e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.096147753150035e-08,
"signal/volume_coverage_10/group_std_mean": 2.6520628182424844e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.4375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0961477664727114e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.0961477664727114e-09,
"signal/volume_coverage_15/centered_abs_mean": 6.579716114174516e-08,
"signal/volume_coverage_15/group_std_mean": 8.336484569326785e-08,
"signal/volume_coverage_15/group_zero_std_frac": 0.259375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.579716593790863e-09,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 6.579716593790863e-09,
"signal/volume_coverage_20/centered_abs_mean": 1.493381773798319e-06,
"signal/volume_coverage_20/group_std_mean": 1.8822723177436274e-06,
"signal/volume_coverage_20/group_zero_std_frac": 0.003125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.4933818164308833e-07,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.4933818164308833e-07,
"signal/volume_coverage_25/centered_abs_mean": 0.0014129023300483822,
"signal/volume_coverage_25/group_std_mean": 0.0017993575427681207,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00014129024057183415,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.00014129024057183415,
"signal/volume_coverage_5/centered_abs_mean": 3.845295371007751e-09,
"signal/volume_coverage_5/group_std_mean": 4.820825294515885e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.871875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.8452955819501257e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 3.8452955819501257e-10,
"step": 190
},
{
"calibration/aurc": 0.28957636994436,
"calibration/batch_distribution_entropy": 0.9859862395418935,
"calibration/buffer_distribution_entropy": 0.9992691344631108,
"calibration/confidence_entropy": 0.5273816689513522,
"calibration/coverage@0%": 0.0019546538649706457,
"calibration/coverage@1%": 0.0019546538649706457,
"calibration/coverage@10%": 0.0285691352739726,
"calibration/coverage@15%": 0.1536226455479452,
"calibration/coverage@20%": 0.27676583904109586,
"calibration/coverage@25%": 0.44141465875733854,
"calibration/coverage@30%": 0.5814135885518591,
"calibration/coverage@5%": 0.0019546538649706457,
"calibration/ece": 0.09294006812124718,
"calibration/mean_confidence": 0.512454016404094,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 534.0,
"completions/max_terminated_length": 534.0,
"completions/mean_length": 176.61015625,
"completions/mean_terminated_length": 176.6790740966797,
"completions/min_length": 12.8,
"completions/min_terminated_length": 68.2,
"epoch": 0.624,
"grad_norm": 0.001361057278700173,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 649107632.0,
"reward": 0.9198349595069886,
"reward_std": 0.09446196258068085,
"rewards/accuracy_reward": 0.52666015625,
"rewards/brier_reward": 0.7886810898780823,
"rewards/confidence_uniqueness_reward": 0.9602108001708984,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.003087216429412365,
"rewards/frontier_ece_reward": 0.005673312395811081,
"rewards/frontier_entropy_batch_reward": -0.18763623535633087,
"rewards/volume_coverage_0": 1.0043352427491215e-09,
"rewards/volume_coverage_1": 1.0043352427491215e-09,
"rewards/volume_coverage_10": 1.4544933479854194e-08,
"rewards/volume_coverage_15": 1.0033256554464743e-07,
"rewards/volume_coverage_20": 7.805879029376684e-07,
"rewards/volume_coverage_25": 0.000946238508913666,
"rewards/volume_coverage_5": 1.0043352427491215e-09,
"signal/accuracy_reward/centered_abs_mean": 0.101385498046875,
"signal/accuracy_reward/group_std_mean": 0.1348143756389618,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0506927490234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0506927490234375,
"signal/advantage_abs_mean": 0.07220707982778549,
"signal/advantage_pre_scale_abs_mean": 0.07220707982778549,
"signal/advantage_pre_scale_std": 0.11911879479885101,
"signal/advantage_std": 0.11911879479885101,
"signal/brier_reward/centered_abs_mean": 0.14108724892139435,
"signal/brier_reward/group_std_mean": 0.18158984780311585,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01410872545093298,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01410872545093298,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012880866974592209,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01754343006759882,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012880866648629307,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012880866648629307,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033061566296964883,
"signal/frontier_aurc_reward/group_std_mean": 0.005856604594737291,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.132695976295508e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.132695976295508e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.010227170027792453,
"signal/frontier_ece_reward/group_std_mean": 0.013960633054375648,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010227170423604548,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010227170423604548,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2718630850315094,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35125975012779237,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027186309918761253,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027186309918761253,
"signal/volume_coverage_0/centered_abs_mean": 2.233685147956521e-09,
"signal/volume_coverage_0/group_std_mean": 2.8576097932386802e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.95625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.233685250652151e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.233685250652151e-10,
"signal/volume_coverage_1/centered_abs_mean": 2.233685147956521e-09,
"signal/volume_coverage_1/group_std_mean": 2.8576097932386802e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.95625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.233685250652151e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.233685250652151e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.6452925894204782e-08,
"signal/volume_coverage_10/group_std_mean": 3.414551414948619e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.484375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.6452926960018884e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.6452926960018884e-09,
"signal/volume_coverage_15/centered_abs_mean": 1.6930871389320145e-07,
"signal/volume_coverage_15/group_std_mean": 2.1859083005892898e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.296875,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.6930871726827944e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.6930871726827944e-08,
"signal/volume_coverage_20/centered_abs_mean": 1.2653310534460616e-06,
"signal/volume_coverage_20/group_std_mean": 1.6244108905993927e-06,
"signal/volume_coverage_20/group_zero_std_frac": 0.009375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.2653311110000232e-07,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.2653311110000232e-07,
"signal/volume_coverage_25/centered_abs_mean": 0.0016905165975913405,
"signal/volume_coverage_25/group_std_mean": 0.0022139872424304487,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00016905165684875101,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.00016905165684875101,
"signal/volume_coverage_5/centered_abs_mean": 2.233685147956521e-09,
"signal/volume_coverage_5/group_std_mean": 2.8576097932386802e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.95625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.233685250652151e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.233685250652151e-10,
"step": 195
},
{
"calibration/aurc": 0.26891983390554686,
"calibration/batch_distribution_entropy": 0.9892751505258353,
"calibration/buffer_distribution_entropy": 0.9993554405698306,
"calibration/confidence_entropy": 0.525787655235901,
"calibration/coverage@0%": 0.009800857843137254,
"calibration/coverage@1%": 0.009800857843137254,
"calibration/coverage@10%": 0.2555836397058823,
"calibration/coverage@15%": 0.3670235906862745,
"calibration/coverage@20%": 0.4393504901960784,
"calibration/coverage@25%": 0.5050428861613139,
"calibration/coverage@30%": 0.5847763270547945,
"calibration/coverage@5%": 0.18278492647058825,
"calibration/ece": 0.17315416015321752,
"calibration/mean_confidence": 0.5182405579510768,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00126953125,
"completions/max_length": 815.0,
"completions/max_terminated_length": 815.0,
"completions/mean_length": 178.43115234375,
"completions/mean_terminated_length": 178.65952758789064,
"completions/min_length": 29.8,
"completions/min_terminated_length": 68.6,
"epoch": 0.64,
"grad_norm": 0.000993815716356039,
"learning_rate": 1e-06,
"loss": -0.0005,
"num_tokens": 666277455.0,
"reward": 0.942539393901825,
"reward_std": 0.08385625034570694,
"rewards/accuracy_reward": 0.57353515625,
"rewards/brier_reward": 0.7822496891021729,
"rewards/confidence_uniqueness_reward": 0.9578921079635621,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_aurc_reward": -0.002722199750132859,
"rewards/frontier_ece_reward": 0.005280413199216127,
"rewards/frontier_entropy_batch_reward": -0.18149682581424714,
"rewards/volume_coverage_0": 5.83049909741451e-10,
"rewards/volume_coverage_1": 5.83049909741451e-10,
"rewards/volume_coverage_10": 4.8380893513666475e-09,
"rewards/volume_coverage_15": 1.38987459408213e-07,
"rewards/volume_coverage_20": 2.436090110791156e-06,
"rewards/volume_coverage_25": 0.0009665171091910452,
"rewards/volume_coverage_5": 5.83049909741451e-10,
"signal/accuracy_reward/centered_abs_mean": 0.078619384765625,
"signal/accuracy_reward/group_std_mean": 0.10908448547124863,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0393096923828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0393096923828125,
"signal/advantage_abs_mean": 0.06242771372199059,
"signal/advantage_pre_scale_abs_mean": 0.06242771372199059,
"signal/advantage_pre_scale_std": 0.10776360332965851,
"signal/advantage_std": 0.10776360332965851,
"signal/brier_reward/centered_abs_mean": 0.13804030120372773,
"signal/brier_reward/group_std_mean": 0.17795804738998414,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013804030977189541,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013804030977189541,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01370534636080265,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018724654987454414,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001370534673333168,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001370534673333168,
"signal/format_reward/centered_abs_mean": 0.00230712890625,
"signal/format_reward/group_std_mean": 0.0046893797349184755,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001153564453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001153564453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00299143441952765,
"signal/frontier_aurc_reward/group_std_mean": 0.005100049264729023,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7392930607893504e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7392930607893504e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.009928003698587418,
"signal/frontier_ece_reward/group_std_mean": 0.013508135452866554,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009928003652021288,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009928003652021288,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25994506776332854,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3403321862220764,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02599450834095478,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02599450834095478,
"signal/volume_coverage_0/centered_abs_mean": 2.0802533140518166e-09,
"signal/volume_coverage_0/group_std_mean": 2.721802350169611e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.90625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.080253476421934e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.080253476421934e-10,
"signal/volume_coverage_1/centered_abs_mean": 2.0802533140518166e-09,
"signal/volume_coverage_1/group_std_mean": 2.721802350169611e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.90625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.080253476421934e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.080253476421934e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.7485952596985042e-08,
"signal/volume_coverage_10/group_std_mean": 2.2394617360532722e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.478125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7485952463758282e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.7485952463758282e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.2916896256219843e-07,
"signal/volume_coverage_15/group_std_mean": 2.9144772639710936e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.2625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.2916897002289715e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.2916897002289715e-08,
"signal/volume_coverage_20/centered_abs_mean": 5.1552106924646065e-06,
"signal/volume_coverage_20/group_std_mean": 6.58480396396044e-06,
"signal/volume_coverage_20/group_zero_std_frac": 0.05,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 5.155210658358556e-07,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 5.155210658358556e-07,
"signal/volume_coverage_25/centered_abs_mean": 0.001954718097113073,
"signal/volume_coverage_25/group_std_mean": 0.002557957172393799,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00019547181436792017,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.00019547181436792017,
"signal/volume_coverage_5/centered_abs_mean": 2.0802533140518166e-09,
"signal/volume_coverage_5/group_std_mean": 2.721802350169611e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.90625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.080253476421934e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.080253476421934e-10,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.48648042753111,
"eval_calibration/batch_distribution_entropy": 0.9412787594127898,
"eval_calibration/buffer_distribution_entropy": 0.9993122469934742,
"eval_calibration/confidence_entropy": 0.496019188313124,
"eval_calibration/coverage@0%": 0.078125,
"eval_calibration/coverage@1%": 0.078125,
"eval_calibration/coverage@10%": 0.078125,
"eval_calibration/coverage@15%": 0.0859375,
"eval_calibration/coverage@20%": 0.15625,
"eval_calibration/coverage@25%": 0.2109375,
"eval_calibration/coverage@30%": 0.2265625,
"eval_calibration/coverage@5%": 0.078125,
"eval_calibration/ece": 0.21468968819672685,
"eval_calibration/mean_confidence": 0.4715669516536699,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 364.5,
"eval_completions/max_terminated_length": 364.5,
"eval_completions/mean_length": 180.03556060791016,
"eval_completions/mean_terminated_length": 180.03556060791016,
"eval_completions/min_length": 92.5,
"eval_completions/min_terminated_length": 92.5,
"eval_loss": 0.0,
"eval_num_tokens": 666277455.0,
"eval_reward": 0.7850509434938431,
"eval_reward_std": 0.2481500282883644,
"eval_rewards/accuracy_reward": 0.431640625,
"eval_rewards/brier_reward": 0.7836132198572159,
"eval_rewards/confidence_uniqueness_reward": 0.901123046875,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.003661160182673484,
"eval_rewards/frontier_ece_reward": 0.00562399672344327,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_rewards/volume_coverage_0": 3.885341458287339e-10,
"eval_rewards/volume_coverage_1": 3.885341458287339e-10,
"eval_rewards/volume_coverage_10": 5.619555665248299e-08,
"eval_rewards/volume_coverage_15": 4.643091244815878e-07,
"eval_rewards/volume_coverage_20": 1.3747317098022904e-05,
"eval_rewards/volume_coverage_25": 0.0023892930475994945,
"eval_rewards/volume_coverage_5": 6.120992632890898e-09,
"eval_runtime": 19.7879,
"eval_samples_per_second": 25.268,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4705810546875,
"eval_signal/accuracy_reward/group_std_mean": 0.4921695739030838,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23529052734375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23529052734375,
"eval_signal/advantage_abs_mean": 0.23600686714053154,
"eval_signal/advantage_pre_scale_abs_mean": 0.23600686714053154,
"eval_signal/advantage_pre_scale_std": 0.24546074494719505,
"eval_signal/advantage_std": 0.24546074494719505,
"eval_signal/brier_reward/centered_abs_mean": 0.18876836448907852,
"eval_signal/brier_reward/group_std_mean": 0.23586371541023254,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01887683616951108,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01887683616951108,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0405426025390625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04792775306850672,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004054260440170765,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004054260440170765,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004725108272396028,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008816197630949318,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9063855587737635e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9063855587737635e-05,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.011614769231528044,
"eval_signal/frontier_ece_reward/group_std_mean": 0.01629130309447646,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011614770046435297,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011614770046435297,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/volume_coverage_0/centered_abs_mean": 8.46114164843037e-10,
"eval_signal/volume_coverage_0/group_std_mean": 1.0500743954922065e-09,
"eval_signal/volume_coverage_0/group_zero_std_frac": 1.0,
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.4611418132291e-11,
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 8.4611418132291e-11,
"eval_signal/volume_coverage_1/centered_abs_mean": 8.46114164843037e-10,
"eval_signal/volume_coverage_1/group_std_mean": 1.0500743954922065e-09,
"eval_signal/volume_coverage_1/group_zero_std_frac": 1.0,
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.4611418132291e-11,
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 8.4611418132291e-11,
"eval_signal/volume_coverage_10/centered_abs_mean": 1.3369631712834007e-07,
"eval_signal/volume_coverage_10/group_std_mean": 1.6589476459216712e-07,
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.4375,
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.3369631957083072e-08,
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.3369631957083072e-08,
"eval_signal/volume_coverage_15/centered_abs_mean": 8.858086886220917e-07,
"eval_signal/volume_coverage_15/group_std_mean": 1.1126764576374626e-06,
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.1875,
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.858087241492285e-08,
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 8.858087241492285e-08,
"eval_signal/volume_coverage_20/centered_abs_mean": 2.2470571821031626e-05,
"eval_signal/volume_coverage_20/group_std_mean": 2.80036192634725e-05,
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.125,
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.247057153681453e-06,
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 2.247057153681453e-06,
"eval_signal/volume_coverage_25/centered_abs_mean": 0.004138121090363711,
"eval_signal/volume_coverage_25/group_std_mean": 0.005272858194075525,
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00041381209302926436,
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.00041381209302926436,
"eval_signal/volume_coverage_5/centered_abs_mean": 2.835196968864473e-08,
"eval_signal/volume_coverage_5/group_std_mean": 3.607782748243915e-08,
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.875,
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.8351969587163406e-09,
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 2.8351969587163406e-09,
"eval_steps_per_second": 0.202,
"step": 200
},
{
"calibration/aurc": 0.4352268779511471,
"calibration/batch_distribution_entropy": 0.9830507741058137,
"calibration/buffer_distribution_entropy": 0.9993089567781432,
"calibration/confidence_entropy": 0.5284715032781411,
"calibration/coverage@0%": 0.008203125,
"calibration/coverage@1%": 0.008203125,
"calibration/coverage@10%": 0.008203125,
"calibration/coverage@15%": 0.01171875,
"calibration/coverage@20%": 0.01484375,
"calibration/coverage@25%": 0.07109375,
"calibration/coverage@30%": 0.205078125,
"calibration/coverage@5%": 0.008203125,
"calibration/ece": 0.11390982277395512,
"calibration/mean_confidence": 0.480846441143625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 488.0,
"completions/max_terminated_length": 488.0,
"completions/mean_length": 179.42890625,
"completions/mean_terminated_length": 179.46363220214843,
"completions/min_length": 57.6,
"completions/min_terminated_length": 73.4,
"epoch": 0.656,
"grad_norm": 0.0011551964562386274,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 682971351.0,
"reward": 0.9070303916931153,
"reward_std": 0.09220799803733826,
"rewards/accuracy_reward": 0.5052734375,
"rewards/brier_reward": 0.7661949157714844,
"rewards/confidence_uniqueness_reward": 0.9564722537994385,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003637386718764901,
"rewards/frontier_ece_reward": 0.0046428360510617495,
"rewards/frontier_entropy_batch_reward": -0.18354730010032655,
"rewards/volume_coverage_0": 1.1100982744149945e-09,
"rewards/volume_coverage_1": 1.1100982744149945e-09,
"rewards/volume_coverage_10": 4.797209829376925e-09,
"rewards/volume_coverage_15": 1.8364521581215598e-07,
"rewards/volume_coverage_20": 8.764892163526383e-06,
"rewards/volume_coverage_25": 0.0015963076613843441,
"rewards/volume_coverage_5": 1.0403670203107397e-09,
"signal/accuracy_reward/centered_abs_mean": 0.097314453125,
"signal/accuracy_reward/group_std_mean": 0.13193988651037217,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0486572265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0486572265625,
"signal/advantage_abs_mean": 0.07022999972105026,
"signal/advantage_pre_scale_abs_mean": 0.07022999972105026,
"signal/advantage_pre_scale_std": 0.11693431288003922,
"signal/advantage_std": 0.11693431288003922,
"signal/brier_reward/centered_abs_mean": 0.1503700226545334,
"signal/brier_reward/group_std_mean": 0.19127426743507386,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015037002786993981,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015037002786993981,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012323895655572414,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016047840379178523,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001232389616779983,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001232389616779983,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036699477583169936,
"signal/frontier_aurc_reward/group_std_mean": 0.006296676304191351,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.587434959830716e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.587434959830716e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.009865659102797509,
"signal/frontier_ece_reward/group_std_mean": 0.013996114954352379,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00098656591726467,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00098656591726467,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2666574031114578,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34437201619148256,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026665739342570306,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026665739342570306,
"signal/volume_coverage_0/centered_abs_mean": 3.058716169235254e-09,
"signal/volume_coverage_0/group_std_mean": 3.978690621408987e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.871875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.058716224746405e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 3.058716224746405e-10,
"signal/volume_coverage_1/centered_abs_mean": 3.058716169235254e-09,
"signal/volume_coverage_1/group_std_mean": 3.978690621408987e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.871875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.058716224746405e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 3.058716224746405e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.4398968239959231e-08,
"signal/volume_coverage_10/group_std_mean": 1.8700468018550964e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.575,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4398968462003837e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.4398968462003837e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.995924269555417e-07,
"signal/volume_coverage_15/group_std_mean": 3.894604368781529e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.275,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.995924384130433e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.995924384130433e-08,
"signal/volume_coverage_20/centered_abs_mean": 1.7284190471400505e-05,
"signal/volume_coverage_20/group_std_mean": 2.2454723512055354e-05,
"signal/volume_coverage_20/group_zero_std_frac": 0.10625,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.7284191017097327e-06,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.7284191017097327e-06,
"signal/volume_coverage_25/centered_abs_mean": 0.0025885120034217835,
"signal/volume_coverage_25/group_std_mean": 0.0033385612536221743,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00025885119393933567,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.00025885119393933567,
"signal/volume_coverage_5/centered_abs_mean": 3.518249025447062e-09,
"signal/volume_coverage_5/group_std_mean": 4.586072410717179e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.865625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.518249136469365e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 3.518249136469365e-10,
"step": 205
},
{
"calibration/aurc": 0.32527705960723285,
"calibration/batch_distribution_entropy": 0.9799286460754233,
"calibration/buffer_distribution_entropy": 0.9991726792626041,
"calibration/confidence_entropy": 0.5010712565281534,
"calibration/coverage@0%": 0.005086533757338552,
"calibration/coverage@1%": 0.005086533757338552,
"calibration/coverage@10%": 0.07893835616438356,
"calibration/coverage@15%": 0.14262934197651664,
"calibration/coverage@20%": 0.2079195205479452,
"calibration/coverage@25%": 0.30212818003913894,
"calibration/coverage@30%": 0.37953002690802345,
"calibration/coverage@5%": 0.005086533757338552,
"calibration/ece": 0.14933391241023902,
"calibration/mean_confidence": 0.521799676701359,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 613.4,
"completions/max_terminated_length": 613.4,
"completions/mean_length": 179.85869140625,
"completions/mean_terminated_length": 179.9818878173828,
"completions/min_length": 15.8,
"completions/min_terminated_length": 76.6,
"epoch": 0.672,
"grad_norm": 0.0010782018071040511,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 699726544.0,
"reward": 0.9170631289482116,
"reward_std": 0.08810421824455261,
"rewards/accuracy_reward": 0.5203125,
"rewards/brier_reward": 0.7815946936607361,
"rewards/confidence_uniqueness_reward": 0.9560160875320435,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0033685142174363137,
"rewards/frontier_ece_reward": 0.005748637113720178,
"rewards/frontier_entropy_batch_reward": -0.17214102149009705,
"rewards/volume_coverage_0": 2.913457092246219e-09,
"rewards/volume_coverage_1": 2.913457092246219e-09,
"rewards/volume_coverage_10": 2.614499892139577e-08,
"rewards/volume_coverage_15": 4.295584156821519e-07,
"rewards/volume_coverage_20": 1.1378515046089887e-05,
"rewards/volume_coverage_25": 0.001677690027281642,
"rewards/volume_coverage_5": 9.1560294845916e-09,
"signal/accuracy_reward/centered_abs_mean": 0.0943603515625,
"signal/accuracy_reward/group_std_mean": 0.12397949695587158,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04718017578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04718017578125,
"signal/advantage_abs_mean": 0.06749112159013748,
"signal/advantage_pre_scale_abs_mean": 0.06749112159013748,
"signal/advantage_pre_scale_std": 0.11356084197759628,
"signal/advantage_std": 0.11356084197759628,
"signal/brier_reward/centered_abs_mean": 0.14855161011219026,
"signal/brier_reward/group_std_mean": 0.18973374664783477,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014855161309242249,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014855161309242249,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01256434228271246,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018024000525474548,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012564342236146332,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012564342236146332,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.0038669900968670845,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036973343696445225,
"signal/frontier_aurc_reward/group_std_mean": 0.006400900986045599,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.621668122126721e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.621668122126721e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.010354111157357692,
"signal/frontier_ece_reward/group_std_mean": 0.013915826939046383,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010354111203923822,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010354111203923822,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24957755506038665,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3306098520755768,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02495775669813156,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02495775669813156,
"signal/volume_coverage_0/centered_abs_mean": 6.486050097009866e-09,
"signal/volume_coverage_0/group_std_mean": 8.15934981801547e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.728125,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.486050529996845e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.486050529996845e-10,
"signal/volume_coverage_1/centered_abs_mean": 6.486050097009866e-09,
"signal/volume_coverage_1/group_std_mean": 8.15934981801547e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.728125,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.486050529996845e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.486050529996845e-10,
"signal/volume_coverage_10/centered_abs_mean": 4.016737840117912e-08,
"signal/volume_coverage_10/group_std_mean": 5.21149178922542e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.64375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.016737653600444e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.016737653600444e-09,
"signal/volume_coverage_15/centered_abs_mean": 5.667033235567942e-07,
"signal/volume_coverage_15/group_std_mean": 7.309852406933714e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.45,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.66703334925478e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.66703334925478e-08,
"signal/volume_coverage_20/centered_abs_mean": 2.5757231196621432e-05,
"signal/volume_coverage_20/group_std_mean": 3.245171283197124e-05,
"signal/volume_coverage_20/group_zero_std_frac": 0.15625,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.5757231924217195e-06,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.5757231924217195e-06,
"signal/volume_coverage_25/centered_abs_mean": 0.0032109246589243413,
"signal/volume_coverage_25/group_std_mean": 0.004136029817163944,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00032109246822074054,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.00032109246822074054,
"signal/volume_coverage_5/centered_abs_mean": 1.5452855794961805e-08,
"signal/volume_coverage_5/group_std_mean": 1.9889450797450082e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.70625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.5452855883779647e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.5452855883779647e-09,
"step": 210
},
{
"calibration/aurc": 0.3602656739699735,
"calibration/batch_distribution_entropy": 0.9861478235373793,
"calibration/buffer_distribution_entropy": 0.9991490683063098,
"calibration/confidence_entropy": 0.518639065678005,
"calibration/coverage@0%": 0.001171875,
"calibration/coverage@1%": 0.001171875,
"calibration/coverage@10%": 0.01484375,
"calibration/coverage@15%": 0.112890625,
"calibration/coverage@20%": 0.191015625,
"calibration/coverage@25%": 0.41484375,
"calibration/coverage@30%": 0.6168075980392157,
"calibration/coverage@5%": 0.001171875,
"calibration/ece": 0.15347873798188744,
"calibration/mean_confidence": 0.5023286296324256,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 547.2,
"completions/max_terminated_length": 547.2,
"completions/mean_length": 182.27578125,
"completions/mean_terminated_length": 182.3482238769531,
"completions/min_length": 30.8,
"completions/min_terminated_length": 74.8,
"epoch": 0.688,
"grad_norm": 0.0012053457321599126,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 716546968.0,
"reward": 0.9287741422653198,
"reward_std": 0.09164600521326065,
"rewards/accuracy_reward": 0.545703125,
"rewards/brier_reward": 0.7708859205245971,
"rewards/confidence_uniqueness_reward": 0.9557458996772766,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0031304874457418917,
"rewards/frontier_ece_reward": 0.004407191788777709,
"rewards/frontier_entropy_batch_reward": -0.17154249548912048,
"rewards/volume_coverage_0": 3.516108260104289e-09,
"rewards/volume_coverage_1": 3.516108260104289e-09,
"rewards/volume_coverage_10": 1.6128615110133637e-08,
"rewards/volume_coverage_15": 1.4867726569889327e-07,
"rewards/volume_coverage_20": 1.796638493942737e-05,
"rewards/volume_coverage_25": 0.0020555023336783053,
"rewards/volume_coverage_5": 9.340963502690869e-09,
"signal/accuracy_reward/centered_abs_mean": 0.1008544921875,
"signal/accuracy_reward/group_std_mean": 0.13470993041992188,
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05042724609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05042724609375,
"signal/advantage_abs_mean": 0.0695977509021759,
"signal/advantage_pre_scale_abs_mean": 0.0695977509021759,
"signal/advantage_pre_scale_std": 0.11726142615079879,
"signal/advantage_std": 0.11726142615079879,
"signal/brier_reward/centered_abs_mean": 0.1450573042035103,
"signal/brier_reward/group_std_mean": 0.18608674705028533,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014505730383098126,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014505730383098126,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012379896081984042,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01644364632666111,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012379896361380816,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012379896361380816,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033159184269607065,
"signal/frontier_aurc_reward/group_std_mean": 0.005820685159415006,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1448980846325866e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1448980846325866e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.00935722216963768,
"signal/frontier_ece_reward/group_std_mean": 0.012687020935118198,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009357222355902195,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009357222355902195,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25185816884040835,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3297829031944275,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02518581636250019,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02518581636250019,
"signal/volume_coverage_0/centered_abs_mean": 5.15811207080219e-09,
"signal/volume_coverage_0/group_std_mean": 6.73305793341683e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.75625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.158112159620032e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.158112159620032e-10,
"signal/volume_coverage_1/centered_abs_mean": 5.15811207080219e-09,
"signal/volume_coverage_1/group_std_mean": 6.73305793341683e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.75625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.158112159620032e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.158112159620032e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.6287222354426375e-08,
"signal/volume_coverage_10/group_std_mean": 3.41128814085323e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.653125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.62872226208799e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.62872226208799e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.7471078709595533e-07,
"signal/volume_coverage_15/group_std_mean": 3.5181790281058056e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.3,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.7471080343843824e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.7471080343843824e-08,
"signal/volume_coverage_20/centered_abs_mean": 2.58131702139508e-05,
"signal/volume_coverage_20/group_std_mean": 3.3654694198048675e-05,
"signal/volume_coverage_20/group_zero_std_frac": 0.05,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.5813170623223414e-06,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.5813170623223414e-06,
"signal/volume_coverage_25/centered_abs_mean": 0.003704306995496154,
"signal/volume_coverage_25/group_std_mean": 0.004811380803585052,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.000370430713519454,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.000370430713519454,
"signal/volume_coverage_5/centered_abs_mean": 1.2779765867776404e-08,
"signal/volume_coverage_5/group_std_mean": 1.6574323069562525e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.73125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2779766911386047e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.2779766911386047e-09,
"step": 215
},
{
"calibration/aurc": 0.25712697833674014,
"calibration/batch_distribution_entropy": 0.985372593939745,
"calibration/buffer_distribution_entropy": 0.9989997082680914,
"calibration/confidence_entropy": 0.5112285743157771,
"calibration/coverage@0%": 0.001953889432485323,
"calibration/coverage@1%": 0.001953889432485323,
"calibration/coverage@10%": 0.027735139432485324,
"calibration/coverage@15%": 0.18099544398238748,
"calibration/coverage@20%": 0.32993364726027397,
"calibration/coverage@25%": 0.5253309992661448,
"calibration/coverage@30%": 0.7226340814579256,
"calibration/coverage@5%": 0.001953889432485323,
"calibration/ece": 0.1029719488611335,
"calibration/mean_confidence": 0.518194728545559,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 581.8,
"completions/max_terminated_length": 581.8,
"completions/mean_length": 181.2587890625,
"completions/mean_terminated_length": 181.38422546386718,
"completions/min_length": 13.0,
"completions/min_terminated_length": 76.4,
"epoch": 0.704,
"grad_norm": 0.00116161466576159,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 733269202.0,
"reward": 0.9273841381072998,
"reward_std": 0.08398017287254333,
"rewards/accuracy_reward": 0.54111328125,
"rewards/brier_reward": 0.7831794381141662,
"rewards/confidence_uniqueness_reward": 0.9567205667495727,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0031599897425621747,
"rewards/frontier_ece_reward": 0.004753571469336748,
"rewards/frontier_entropy_batch_reward": -0.17446809411048889,
"rewards/volume_coverage_0": 2.014733241351152e-09,
"rewards/volume_coverage_1": 2.014733241351152e-09,
"rewards/volume_coverage_10": 2.636177316439614e-08,
"rewards/volume_coverage_15": 9.60477851918995e-08,
"rewards/volume_coverage_20": 1.1378265980965807e-05,
"rewards/volume_coverage_25": 0.0018908534664660692,
"rewards/volume_coverage_5": 6.5962154960352845e-09,
"signal/accuracy_reward/centered_abs_mean": 0.083428955078125,
"signal/accuracy_reward/group_std_mean": 0.11187773495912552,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0417144775390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0417144775390625,
"signal/advantage_abs_mean": 0.06403366848826408,
"signal/advantage_pre_scale_abs_mean": 0.06403366848826408,
"signal/advantage_pre_scale_std": 0.10933973640203476,
"signal/advantage_std": 0.10933973640203476,
"signal/brier_reward/centered_abs_mean": 0.1387535125017166,
"signal/brier_reward/group_std_mean": 0.17746146619319916,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013875351287424564,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013875351287424564,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012523720599710941,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01720440424978733,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012523720972239972,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012523720972239972,
"signal/format_reward/centered_abs_mean": 0.001287841796875,
"signal/format_reward/group_std_mean": 0.0031351604498922824,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006439208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006439208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034087890293449164,
"signal/frontier_aurc_reward/group_std_mean": 0.005871927179396152,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.260986315784976e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.260986315784976e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.009251984581351281,
"signal/frontier_ece_reward/group_std_mean": 0.012797567993402481,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009251985000446438,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009251985000446438,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.252427738904953,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33083915114402773,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025242774933576583,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025242774933576583,
"signal/volume_coverage_0/centered_abs_mean": 5.998387475614209e-09,
"signal/volume_coverage_0/group_std_mean": 7.584478556310614e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.765625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.998387564432051e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.998387564432051e-10,
"signal/volume_coverage_1/centered_abs_mean": 5.998387475614209e-09,
"signal/volume_coverage_1/group_std_mean": 7.584478556310614e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.765625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.998387564432051e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.998387564432051e-10,
"signal/volume_coverage_10/centered_abs_mean": 5.079258258433583e-08,
"signal/volume_coverage_10/group_std_mean": 6.501357034949251e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.5375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.079258391660346e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 5.079258391660346e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.757137721687286e-07,
"signal/volume_coverage_15/group_std_mean": 3.5040081058923533e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.340625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.757137842479551e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.757137842479551e-08,
"signal/volume_coverage_20/centered_abs_mean": 3.360342743690126e-05,
"signal/volume_coverage_20/group_std_mean": 4.258807239239104e-05,
"signal/volume_coverage_20/group_zero_std_frac": 0.15,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.3603427255002315e-06,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.3603427255002315e-06,
"signal/volume_coverage_25/centered_abs_mean": 0.003729742532595992,
"signal/volume_coverage_25/group_std_mean": 0.004862392600625753,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0003729742602445185,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0003729742602445185,
"signal/volume_coverage_5/centered_abs_mean": 1.4848486706853237e-08,
"signal/volume_coverage_5/group_std_mean": 1.8887312158710756e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.728125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.484848732857813e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.484848732857813e-09,
"step": 220
},
{
"calibration/aurc": 0.26982895429920506,
"calibration/batch_distribution_entropy": 0.9920044552136869,
"calibration/buffer_distribution_entropy": 0.9989689610022271,
"calibration/confidence_entropy": 0.5077218693296869,
"calibration/coverage@0%": 0.009765625,
"calibration/coverage@1%": 0.009765625,
"calibration/coverage@10%": 0.146484375,
"calibration/coverage@15%": 0.301171875,
"calibration/coverage@20%": 0.380078125,
"calibration/coverage@25%": 0.487109375,
"calibration/coverage@30%": 0.6015625,
"calibration/coverage@5%": 0.02109375,
"calibration/ece": 0.13914586113741415,
"calibration/mean_confidence": 0.5164801231632847,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 691.8,
"completions/max_terminated_length": 691.8,
"completions/mean_length": 181.493359375,
"completions/mean_terminated_length": 181.58330993652345,
"completions/min_length": 46.4,
"completions/min_terminated_length": 77.0,
"epoch": 0.72,
"grad_norm": 0.001211289200000465,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 750137550.0,
"reward": 0.9408392310142517,
"reward_std": 0.08822100460529328,
"rewards/accuracy_reward": 0.56865234375,
"rewards/brier_reward": 0.7844376921653747,
"rewards/confidence_uniqueness_reward": 0.956852662563324,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0027199994772672655,
"rewards/frontier_ece_reward": 0.004829889070242644,
"rewards/frontier_entropy_batch_reward": -0.18062590062618256,
"rewards/volume_coverage_0": 2.3184011668786474e-09,
"rewards/volume_coverage_1": 2.3184011668786474e-09,
"rewards/volume_coverage_10": 2.2449454442630133e-08,
"rewards/volume_coverage_15": 3.639436144453612e-07,
"rewards/volume_coverage_20": 1.3974024386698148e-05,
"rewards/volume_coverage_25": 0.002402997249737382,
"rewards/volume_coverage_5": 4.878357745541351e-09,
"signal/accuracy_reward/centered_abs_mean": 0.092950439453125,
"signal/accuracy_reward/group_std_mean": 0.12576421201229096,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0464752197265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0464752197265625,
"signal/advantage_abs_mean": 0.06602133065462112,
"signal/advantage_pre_scale_abs_mean": 0.06602133065462112,
"signal/advantage_pre_scale_std": 0.11225824356079102,
"signal/advantage_std": 0.11225824356079102,
"signal/brier_reward/centered_abs_mean": 0.13668132722377777,
"signal/brier_reward/group_std_mean": 0.1750232219696045,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013668132573366165,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013668132573366165,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012708039209246635,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017119022272527217,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012708039255812764,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012708039255812764,
"signal/format_reward/centered_abs_mean": 0.000933837890625,
"signal/format_reward/group_std_mean": 0.002425827318802476,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003009202517569065,
"signal/frontier_aurc_reward/group_std_mean": 0.005182502605021,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.761503321584314e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.761503321584314e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.009087760373950005,
"signal/frontier_ece_reward/group_std_mean": 0.012493956461548805,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009087760234251618,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009087760234251618,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2623806893825531,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3368456959724426,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02623806968331337,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02623806968331337,
"signal/volume_coverage_0/centered_abs_mean": 5.69514373438551e-09,
"signal/volume_coverage_0/group_std_mean": 7.23545801051273e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.76875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.695143467931985e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.695143467931985e-10,
"signal/volume_coverage_1/centered_abs_mean": 5.69514373438551e-09,
"signal/volume_coverage_1/group_std_mean": 7.23545801051273e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.76875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.695143467931985e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.695143467931985e-10,
"signal/volume_coverage_10/centered_abs_mean": 6.64894066915167e-08,
"signal/volume_coverage_10/group_std_mean": 8.501251524961617e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.53125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 6.6489410199821465e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 6.6489410199821465e-09,
"signal/volume_coverage_15/centered_abs_mean": 4.1270519659519776e-07,
"signal/volume_coverage_15/group_std_mean": 5.180600197718377e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.30625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.1270518913449904e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 4.1270518913449904e-08,
"signal/volume_coverage_20/centered_abs_mean": 4.9445707554696126e-05,
"signal/volume_coverage_20/group_std_mean": 6.335518482956105e-05,
"signal/volume_coverage_20/group_zero_std_frac": 0.1,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.94457085551403e-06,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 4.94457085551403e-06,
"signal/volume_coverage_25/centered_abs_mean": 0.004809588473290205,
"signal/volume_coverage_25/group_std_mean": 0.006234651803970337,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0004809588834177703,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0004809588834177703,
"signal/volume_coverage_5/centered_abs_mean": 1.8328495698938242e-08,
"signal/volume_coverage_5/group_std_mean": 2.352717913822744e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.746875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.832849616523191e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.832849616523191e-09,
"step": 225
},
{
"calibration/aurc": 0.2828847264622761,
"calibration/batch_distribution_entropy": 0.9849187010660106,
"calibration/buffer_distribution_entropy": 0.9990935703753154,
"calibration/confidence_entropy": 0.49405605315020173,
"calibration/coverage@0%": 0.005859375,
"calibration/coverage@1%": 0.005859375,
"calibration/coverage@10%": 0.05859375,
"calibration/coverage@15%": 0.121875,
"calibration/coverage@20%": 0.391015625,
"calibration/coverage@25%": 0.50078125,
"calibration/coverage@30%": 0.611328125,
"calibration/coverage@5%": 0.005859375,
"calibration/ece": 0.13080733854407284,
"calibration/mean_confidence": 0.5411869919490867,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 672.8,
"completions/max_terminated_length": 672.8,
"completions/mean_length": 181.01396484375,
"completions/mean_terminated_length": 181.0318176269531,
"completions/min_length": 59.2,
"completions/min_terminated_length": 75.6,
"epoch": 0.736,
"grad_norm": 0.001337669906206429,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 766930717.0,
"reward": 0.9395357966423035,
"reward_std": 0.08629318326711655,
"rewards/accuracy_reward": 0.55966796875,
"rewards/brier_reward": 0.7854518890380859,
"rewards/confidence_uniqueness_reward": 0.9589093208312989,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003115908848121762,
"rewards/frontier_ece_reward": 0.005211750976741314,
"rewards/frontier_entropy_batch_reward": -0.15451081991195678,
"rewards/volume_coverage_0": 2.006316779379347e-09,
"rewards/volume_coverage_1": 2.006316779379347e-09,
"rewards/volume_coverage_10": 1.695187847872148e-08,
"rewards/volume_coverage_15": 9.291495466356991e-08,
"rewards/volume_coverage_20": 5.661431605403777e-05,
"rewards/volume_coverage_25": 0.003265319438651204,
"rewards/volume_coverage_5": 5.1611755313984984e-09,
"signal/accuracy_reward/centered_abs_mean": 0.091168212890625,
"signal/accuracy_reward/group_std_mean": 0.12514048665761948,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455841064453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0455841064453125,
"signal/advantage_abs_mean": 0.06469556018710136,
"signal/advantage_pre_scale_abs_mean": 0.06469556018710136,
"signal/advantage_pre_scale_std": 0.10994518399238587,
"signal/advantage_std": 0.10994518399238587,
"signal/brier_reward/centered_abs_mean": 0.14189725518226623,
"signal/brier_reward/group_std_mean": 0.18289188742637635,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014189725369215011,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014189725369215011,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011633879318833352,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015210827626287938,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001163387973792851,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001163387973792851,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034561751410365103,
"signal/frontier_aurc_reward/group_std_mean": 0.005828452110290527,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.320219159126282e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.320219159126282e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.009344759956002235,
"signal/frontier_ece_reward/group_std_mean": 0.01285859029740095,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009344760212115944,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009344760212115944,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23316074311733245,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.306648051738739,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023316074162721634,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023316074162721634,
"signal/volume_coverage_0/centered_abs_mean": 5.230916322318535e-09,
"signal/volume_coverage_0/group_std_mean": 6.707384425652663e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.771875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.230916477749759e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.230916477749759e-10,
"signal/volume_coverage_1/centered_abs_mean": 5.230916322318535e-09,
"signal/volume_coverage_1/group_std_mean": 6.707384425652663e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.771875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.230916477749759e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.230916477749759e-10,
"signal/volume_coverage_10/centered_abs_mean": 6.583693021866565e-08,
"signal/volume_coverage_10/group_std_mean": 8.430675393356069e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.471875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 6.583693457073991e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 6.583693457073991e-09,
"signal/volume_coverage_15/centered_abs_mean": 3.5697502198672737e-07,
"signal/volume_coverage_15/group_std_mean": 4.58353112264831e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.20625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.569749988940884e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 3.569749988940884e-08,
"signal/volume_coverage_20/centered_abs_mean": 0.00013909742992836981,
"signal/volume_coverage_20/group_std_mean": 0.00017906517896335573,
"signal/volume_coverage_20/group_zero_std_frac": 0.009375,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.390974321111571e-05,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.390974321111571e-05,
"signal/volume_coverage_25/centered_abs_mean": 0.005795108247548341,
"signal/volume_coverage_25/group_std_mean": 0.007614379748702049,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0005795108270831406,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0005795108270831406,
"signal/volume_coverage_5/centered_abs_mean": 1.3240050122931279e-08,
"signal/volume_coverage_5/group_std_mean": 1.6939196179066583e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.665625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.3240050056317898e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.3240050056317898e-09,
"step": 230
},
{
"calibration/aurc": 0.3052104139588967,
"calibration/batch_distribution_entropy": 0.9845980873959596,
"calibration/buffer_distribution_entropy": 0.9992672107871774,
"calibration/confidence_entropy": 0.481453651763142,
"calibration/coverage@0%": 0.005867783757338552,
"calibration/coverage@1%": 0.005867783757338552,
"calibration/coverage@10%": 0.02501605308219178,
"calibration/coverage@15%": 0.0840913955479452,
"calibration/coverage@20%": 0.28422975782778864,
"calibration/coverage@25%": 0.4041883255870841,
"calibration/coverage@30%": 0.5483564701565558,
"calibration/coverage@5%": 0.005867783757338552,
"calibration/ece": 0.13235262642024298,
"calibration/mean_confidence": 0.4699991623427877,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 519.8,
"completions/max_terminated_length": 519.8,
"completions/mean_length": 183.3912109375,
"completions/mean_terminated_length": 183.4627258300781,
"completions/min_length": 17.0,
"completions/min_terminated_length": 80.6,
"epoch": 0.752,
"grad_norm": 0.0008947931928560138,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 784035843.0,
"reward": 0.937898600101471,
"reward_std": 0.08305409550666809,
"rewards/accuracy_reward": 0.56201171875,
"rewards/brier_reward": 0.7806243419647216,
"rewards/confidence_uniqueness_reward": 0.956540560722351,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0032437034416943787,
"rewards/frontier_ece_reward": 0.0044742335565388204,
"rewards/frontier_entropy_batch_reward": -0.17429540455341339,
"rewards/volume_coverage_0": 4.7879359588165474e-09,
"rewards/volume_coverage_1": 4.7879359588165474e-09,
"rewards/volume_coverage_10": 8.599188774383038e-08,
"rewards/volume_coverage_15": 3.474765513544753e-07,
"rewards/volume_coverage_20": 7.103228454070631e-05,
"rewards/volume_coverage_25": 0.003870873898267746,
"rewards/volume_coverage_5": 1.2054030928965177e-08,
"signal/accuracy_reward/centered_abs_mean": 0.083892822265625,
"signal/accuracy_reward/group_std_mean": 0.11160431355237961,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0419464111328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0419464111328125,
"signal/advantage_abs_mean": 0.06367235779762268,
"signal/advantage_pre_scale_abs_mean": 0.06367235779762268,
"signal/advantage_pre_scale_std": 0.10790289640426635,
"signal/advantage_std": 0.10790289640426635,
"signal/brier_reward/centered_abs_mean": 0.1400133416056633,
"signal/brier_reward/group_std_mean": 0.17920613586902617,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014001334644854068,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014001334644854068,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012570606358349324,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016902846470475196,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012570605846121906,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012570605846121906,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003417500341311097,
"signal/frontier_aurc_reward/group_std_mean": 0.005739410500973463,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.271875586709939e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.271875586709939e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.009020579047501087,
"signal/frontier_ece_reward/group_std_mean": 0.012551595270633698,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009020578931085765,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009020578931085765,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2512552380561829,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32768315076828003,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025125524401664732,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025125524401664732,
"signal/volume_coverage_0/centered_abs_mean": 7.3125370647630915e-09,
"signal/volume_coverage_0/group_std_mean": 9.442500292777822e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.690625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.312537242398776e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.312537242398776e-10,
"signal/volume_coverage_1/centered_abs_mean": 7.3125370647630915e-09,
"signal/volume_coverage_1/group_std_mean": 9.442500292777822e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.690625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.312537242398776e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.312537242398776e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.515987818834219e-07,
"signal/volume_coverage_10/group_std_mean": 1.9257330734490098e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.4625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.5159878508086422e-08,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.5159878508086422e-08,
"signal/volume_coverage_15/centered_abs_mean": 6.380775175784948e-07,
"signal/volume_coverage_15/group_std_mean": 8.159376591265754e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.296875,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.380775232628367e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 6.380775232628367e-08,
"signal/volume_coverage_20/centered_abs_mean": 0.00014088547613937407,
"signal/volume_coverage_20/group_std_mean": 0.0001808962260838598,
"signal/volume_coverage_20/group_zero_std_frac": 0.0125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.4088547322899103e-05,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.4088547322899103e-05,
"signal/volume_coverage_25/centered_abs_mean": 0.00589982932433486,
"signal/volume_coverage_25/group_std_mean": 0.0076646491885185245,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0005899829440750182,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0005899829440750182,
"signal/volume_coverage_5/centered_abs_mean": 2.8907768445662896e-08,
"signal/volume_coverage_5/group_std_mean": 3.686327421092983e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.6125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.8907769289432395e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.8907769289432395e-09,
"step": 235
},
{
"calibration/aurc": 0.28967973925663304,
"calibration/batch_distribution_entropy": 0.9882842527096105,
"calibration/buffer_distribution_entropy": 0.999540690858549,
"calibration/confidence_entropy": 0.5116336247799427,
"calibration/coverage@0%": 0.014069379892367905,
"calibration/coverage@1%": 0.014069379892367905,
"calibration/coverage@10%": 0.19336701932485323,
"calibration/coverage@15%": 0.2898513943248532,
"calibration/coverage@20%": 0.36487509173189825,
"calibration/coverage@25%": 0.441084423923679,
"calibration/coverage@30%": 0.5059434625733855,
"calibration/coverage@5%": 0.06680375489236791,
"calibration/ece": 0.1683775357806439,
"calibration/mean_confidence": 0.48505974704758303,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 718.4,
"completions/max_terminated_length": 718.4,
"completions/mean_length": 191.35693359375,
"completions/mean_terminated_length": 191.43230590820312,
"completions/min_length": 49.4,
"completions/min_terminated_length": 83.8,
"epoch": 0.768,
"grad_norm": 0.0010617813095450401,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 800928042.0,
"reward": 0.9159645438194275,
"reward_std": 0.08468157351016999,
"rewards/accuracy_reward": 0.516015625,
"rewards/brier_reward": 0.7906257748603821,
"rewards/confidence_uniqueness_reward": 0.954690670967102,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0030185111332684754,
"rewards/frontier_ece_reward": 0.004940245300531387,
"rewards/frontier_entropy_batch_reward": -0.17311182320117952,
"rewards/volume_coverage_0": 6.364088045440752e-09,
"rewards/volume_coverage_1": 6.364088045440752e-09,
"rewards/volume_coverage_10": 2.1145389084153977e-08,
"rewards/volume_coverage_15": 3.079553749785191e-07,
"rewards/volume_coverage_20": 8.821834267109807e-05,
"rewards/volume_coverage_25": 0.004664366459473967,
"rewards/volume_coverage_5": 1.2051903697241073e-08,
"signal/accuracy_reward/centered_abs_mean": 0.08985595703125,
"signal/accuracy_reward/group_std_mean": 0.11778665035963058,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044927978515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044927978515625,
"signal/advantage_abs_mean": 0.06518116667866707,
"signal/advantage_pre_scale_abs_mean": 0.06518116667866707,
"signal/advantage_pre_scale_std": 0.11050616502761841,
"signal/advantage_std": 0.11050616502761841,
"signal/brier_reward/centered_abs_mean": 0.13591494858264924,
"signal/brier_reward/group_std_mean": 0.17277559041976928,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013591495528817176,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013591495528817176,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012128811329603195,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01648256555199623,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012128812028095125,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012128812028095125,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00310793062672019,
"signal/frontier_aurc_reward/group_std_mean": 0.005243441369384527,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.884913166984916e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.884913166984916e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.008205472864210606,
"signal/frontier_ece_reward/group_std_mean": 0.011111721023917198,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008205473190173507,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008205473190173507,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24350886940956115,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3188900947570801,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024350887164473534,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024350887164473534,
"signal/volume_coverage_0/centered_abs_mean": 1.2351764944185106e-08,
"signal/volume_coverage_0/group_std_mean": 1.568862941780935e-08,
"signal/volume_coverage_0/group_zero_std_frac": 0.55625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.2351765832363526e-09,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.2351765832363526e-09,
"signal/volume_coverage_1/centered_abs_mean": 1.2351764944185106e-08,
"signal/volume_coverage_1/group_std_mean": 1.568862941780935e-08,
"signal/volume_coverage_1/group_zero_std_frac": 0.55625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.2351765832363526e-09,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.2351765832363526e-09,
"signal/volume_coverage_10/centered_abs_mean": 8.648569576052978e-08,
"signal/volume_coverage_10/group_std_mean": 1.1136818187651442e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.403125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.64856977145223e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 8.64856977145223e-09,
"signal/volume_coverage_15/centered_abs_mean": 5.247908120509237e-07,
"signal/volume_coverage_15/group_std_mean": 6.701182428514585e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.23125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.2479082057743655e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.2479082057743655e-08,
"signal/volume_coverage_20/centered_abs_mean": 0.0001288706494960934,
"signal/volume_coverage_20/group_std_mean": 0.00016244519356405361,
"signal/volume_coverage_20/group_zero_std_frac": 0.090625,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.2887064895039656e-05,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.2887064895039656e-05,
"signal/volume_coverage_25/centered_abs_mean": 0.00626561539247632,
"signal/volume_coverage_25/group_std_mean": 0.008005017414689064,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0006265615345910191,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0006265615345910191,
"signal/volume_coverage_5/centered_abs_mean": 2.908500817966342e-08,
"signal/volume_coverage_5/group_std_mean": 3.7089595750217085e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.521875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.9085007291485e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.9085007291485e-09,
"step": 240
},
{
"calibration/aurc": 0.3409073022681162,
"calibration/batch_distribution_entropy": 0.9818591637152853,
"calibration/buffer_distribution_entropy": 0.999580250636168,
"calibration/confidence_entropy": 0.4921108300165389,
"calibration/coverage@0%": 0.000390625,
"calibration/coverage@1%": 0.000390625,
"calibration/coverage@10%": 0.14609375,
"calibration/coverage@15%": 0.23587840741650296,
"calibration/coverage@20%": 0.29508610633595284,
"calibration/coverage@25%": 0.3417247360019647,
"calibration/coverage@30%": 0.39851731335952845,
"calibration/coverage@5%": 0.0671875,
"calibration/ece": 0.14462367827428624,
"calibration/mean_confidence": 0.5428684503415782,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 606.2,
"completions/max_terminated_length": 606.2,
"completions/mean_length": 193.3060546875,
"completions/mean_terminated_length": 193.4197784423828,
"completions/min_length": 34.2,
"completions/min_terminated_length": 87.4,
"epoch": 0.784,
"grad_norm": 0.0009081112220883369,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 818081864.0,
"reward": 0.9324936270713806,
"reward_std": 0.08699233829975128,
"rewards/accuracy_reward": 0.55498046875,
"rewards/brier_reward": 0.7820330500602722,
"rewards/confidence_uniqueness_reward": 0.9537600040435791,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.003351992089301348,
"rewards/frontier_ece_reward": 0.00407652840949595,
"rewards/frontier_entropy_batch_reward": -0.19070782661437988,
"rewards/volume_coverage_0": 3.5432023204329256e-09,
"rewards/volume_coverage_1": 3.5432023204329256e-09,
"rewards/volume_coverage_10": 2.968028498528952e-08,
"rewards/volume_coverage_15": 3.8283018959361926e-07,
"rewards/volume_coverage_20": 0.00011098165159637575,
"rewards/volume_coverage_25": 0.004109382582828403,
"rewards/volume_coverage_5": 1.2307213759221724e-08,
"signal/accuracy_reward/centered_abs_mean": 0.086224365234375,
"signal/accuracy_reward/group_std_mean": 0.1191135048866272,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0431121826171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0431121826171875,
"signal/advantage_abs_mean": 0.06541964039206505,
"signal/advantage_pre_scale_abs_mean": 0.06541964039206505,
"signal/advantage_pre_scale_std": 0.11036005318164825,
"signal/advantage_std": 0.11036005318164825,
"signal/brier_reward/centered_abs_mean": 0.1302357941865921,
"signal/brier_reward/group_std_mean": 0.16782908141613007,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013023579306900502,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013023579306900502,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012619849853217601,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016879118233919143,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012619849760085345,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012619849760085345,
"signal/format_reward/centered_abs_mean": 0.0010986328125,
"signal/format_reward/group_std_mean": 0.0025827332865446806,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00054931640625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00054931640625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003462395863607526,
"signal/frontier_aurc_reward/group_std_mean": 0.00611389996483922,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.327995120547712e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.327995120547712e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.007947401888668538,
"signal/frontier_ece_reward/group_std_mean": 0.010621737688779831,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007947401842102409,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007947401842102409,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2670396983623505,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34181196689605714,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02670396976172924,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02670396976172924,
"signal/volume_coverage_0/centered_abs_mean": 7.350568687058967e-09,
"signal/volume_coverage_0/group_std_mean": 9.562578107136233e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.65,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.350568487218823e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.350568487218823e-10,
"signal/volume_coverage_1/centered_abs_mean": 7.350568687058967e-09,
"signal/volume_coverage_1/group_std_mean": 9.562578107136233e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.65,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.350568487218823e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.350568487218823e-10,
"signal/volume_coverage_10/centered_abs_mean": 4.7587561269324394e-08,
"signal/volume_coverage_10/group_std_mean": 6.18870934943061e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.534375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.758756055878166e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.758756055878166e-09,
"signal/volume_coverage_15/centered_abs_mean": 9.245440821814555e-07,
"signal/volume_coverage_15/group_std_mean": 1.2132376781437415e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.15,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 9.245441106031649e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 9.245441106031649e-08,
"signal/volume_coverage_20/centered_abs_mean": 0.00030603163759224117,
"signal/volume_coverage_20/group_std_mean": 0.00039949056517798456,
"signal/volume_coverage_20/group_zero_std_frac": 0.05,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.0603163759224115e-05,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.0603163759224115e-05,
"signal/volume_coverage_25/centered_abs_mean": 0.006639927253127098,
"signal/volume_coverage_25/group_std_mean": 0.008674658834934235,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0006639927276410162,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0006639927276410162,
"signal/volume_coverage_5/centered_abs_mean": 1.897898034286527e-08,
"signal/volume_coverage_5/group_std_mean": 2.4780702645443853e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.571875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.8978980609318796e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.8978980609318796e-09,
"step": 245
},
{
"calibration/aurc": 0.2528686613631735,
"calibration/batch_distribution_entropy": 0.9800658866295919,
"calibration/buffer_distribution_entropy": 0.9994979177014567,
"calibration/confidence_entropy": 0.5019152426365978,
"calibration/coverage@0%": 0.004694416007288473,
"calibration/coverage@1%": 0.004694416007288473,
"calibration/coverage@10%": 0.12131790929280242,
"calibration/coverage@15%": 0.26051909660693096,
"calibration/coverage@20%": 0.44419644060973545,
"calibration/coverage@25%": 0.5879300805507188,
"calibration/coverage@30%": 0.6937708383091669,
"calibration/coverage@5%": 0.004694416007288473,
"calibration/ece": 0.1269755624021652,
"calibration/mean_confidence": 0.4990723125611093,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 592.6,
"completions/max_terminated_length": 592.6,
"completions/mean_length": 199.9748046875,
"completions/mean_terminated_length": 200.09163818359374,
"completions/min_length": 55.4,
"completions/min_terminated_length": 89.6,
"epoch": 0.8,
"grad_norm": 0.0012367883464321494,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 835140166.0,
"reward": 0.9509989142417907,
"reward_std": 0.08965336829423905,
"rewards/accuracy_reward": 0.593359375,
"rewards/brier_reward": 0.7926764488220215,
"rewards/confidence_uniqueness_reward": 0.9519682645797729,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0032140606548637153,
"rewards/frontier_ece_reward": 0.0039047694765031336,
"rewards/frontier_entropy_batch_reward": -0.20638387203216552,
"rewards/volume_coverage_0": 2.2559438539460076e-09,
"rewards/volume_coverage_1": 2.2559438539460076e-09,
"rewards/volume_coverage_10": 4.5241598911616165e-08,
"rewards/volume_coverage_15": 9.52254220010218e-07,
"rewards/volume_coverage_20": 0.00027118420985061675,
"rewards/volume_coverage_25": 0.004574334062635898,
"rewards/volume_coverage_5": 7.028363846472985e-09,
"signal/accuracy_reward/centered_abs_mean": 0.0971435546875,
"signal/accuracy_reward/group_std_mean": 0.12513308376073837,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04857177734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04857177734375,
"signal/advantage_abs_mean": 0.06999209970235824,
"signal/advantage_pre_scale_abs_mean": 0.06999209970235824,
"signal/advantage_pre_scale_std": 0.11670937389135361,
"signal/advantage_std": 0.11670937389135361,
"signal/brier_reward/centered_abs_mean": 0.1270011395215988,
"signal/brier_reward/group_std_mean": 0.16291393637657164,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012700113654136657,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012700113654136657,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013134096190333367,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01759086810052395,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013134096516296268,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013134096516296268,
"signal/format_reward/centered_abs_mean": 0.001251220703125,
"signal/format_reward/group_std_mean": 0.002707315143197775,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006256103515625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006256103515625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003635000390931964,
"signal/frontier_aurc_reward/group_std_mean": 0.006264658644795418,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.543750619632192e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.543750619632192e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.007841948978602886,
"signal/frontier_ece_reward/group_std_mean": 0.010542181693017483,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007841949234716594,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007841949234716594,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.274287748336792,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34808642268180845,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027428776770830155,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027428776770830155,
"signal/volume_coverage_0/centered_abs_mean": 7.121829703748972e-09,
"signal/volume_coverage_0/group_std_mean": 9.051382132696518e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.73125,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.121830125633721e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.121830125633721e-10,
"signal/volume_coverage_1/centered_abs_mean": 7.121829703748972e-09,
"signal/volume_coverage_1/group_std_mean": 9.051382132696518e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.73125,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.121830125633721e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.121830125633721e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.477469595556613e-07,
"signal/volume_coverage_10/group_std_mean": 1.8689709406771727e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.446875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4774697110198076e-08,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.4774697110198076e-08,
"signal/volume_coverage_15/centered_abs_mean": 2.0268829473479854e-06,
"signal/volume_coverage_15/group_std_mean": 2.5734173163982634e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.290625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.026883009875746e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.026883009875746e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.0006177243252750486,
"signal/volume_coverage_20/group_std_mean": 0.000787645042873919,
"signal/volume_coverage_20/group_zero_std_frac": 0.25,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.177243412821553e-05,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 6.177243412821553e-05,
"signal/volume_coverage_25/centered_abs_mean": 0.007191289030015468,
"signal/volume_coverage_25/group_std_mean": 0.009311573766171932,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0007191289332695305,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0007191289332695305,
"signal/volume_coverage_5/centered_abs_mean": 2.6008427056467555e-08,
"signal/volume_coverage_5/group_std_mean": 3.3191895454365294e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.703125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.6008427411738922e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.6008427411738922e-09,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.4908944294407222,
"eval_calibration/batch_distribution_entropy": 0.9348605742427996,
"eval_calibration/buffer_distribution_entropy": 0.9995711557006871,
"eval_calibration/confidence_entropy": 0.505417142366378,
"eval_calibration/coverage@0%": 0.0546875,
"eval_calibration/coverage@1%": 0.0546875,
"eval_calibration/coverage@10%": 0.0546875,
"eval_calibration/coverage@15%": 0.0859375,
"eval_calibration/coverage@20%": 0.1015625,
"eval_calibration/coverage@25%": 0.1875,
"eval_calibration/coverage@30%": 0.1875,
"eval_calibration/coverage@5%": 0.0546875,
"eval_calibration/ece": 0.2639847418086436,
"eval_calibration/mean_confidence": 0.4755017740367686,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 741.5,
"eval_completions/max_terminated_length": 741.5,
"eval_completions/mean_length": 218.63106155395508,
"eval_completions/mean_terminated_length": 218.63106155395508,
"eval_completions/min_length": 108.25,
"eval_completions/min_terminated_length": 108.25,
"eval_loss": 0.0,
"eval_num_tokens": 835140166.0,
"eval_reward": 0.7909766435623169,
"eval_reward_std": 0.2465236335992813,
"eval_rewards/accuracy_reward": 0.447265625,
"eval_rewards/brier_reward": 0.7693072855472565,
"eval_rewards/confidence_uniqueness_reward": 0.89453125,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.004427299543749541,
"eval_rewards/frontier_ece_reward": 0.003629253071267158,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_rewards/volume_coverage_0": 6.361557280953889e-09,
"eval_rewards/volume_coverage_1": 6.361557280953889e-09,
"eval_rewards/volume_coverage_10": 6.214037906460135e-08,
"eval_rewards/volume_coverage_15": 2.913196333054202e-07,
"eval_rewards/volume_coverage_20": 0.0006583120702998713,
"eval_rewards/volume_coverage_25": 0.005864958860911429,
"eval_rewards/volume_coverage_5": 1.7527957307805764e-08,
"eval_runtime": 31.2892,
"eval_samples_per_second": 15.98,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4781494140625,
"eval_signal/accuracy_reward/group_std_mean": 0.49640604108572006,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23907470703125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23907470703125,
"eval_signal/advantage_abs_mean": 0.23595283553004265,
"eval_signal/advantage_pre_scale_abs_mean": 0.23595283553004265,
"eval_signal/advantage_pre_scale_std": 0.24372952803969383,
"eval_signal/advantage_std": 0.24372952803969383,
"eval_signal/brier_reward/centered_abs_mean": 0.19793446362018585,
"eval_signal/brier_reward/group_std_mean": 0.24987414851784706,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019793446641415358,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019793446641415358,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0426177978515625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.049744920805096626,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004261779831722379,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004261779831722379,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.006218503811396658,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.013023799983784556,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.773129982524551e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.773129982524551e-05,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.010007256641983986,
"eval_signal/frontier_ece_reward/group_std_mean": 0.013597892131656408,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010007255914388224,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010007255914388224,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/volume_coverage_0/centered_abs_mean": 1.7614849578961866e-08,
"eval_signal/volume_coverage_0/group_std_mean": 2.2101196428536696e-08,
"eval_signal/volume_coverage_0/group_zero_std_frac": 0.5,
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.7614848746294598e-09,
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 1.7614848746294598e-09,
"eval_signal/volume_coverage_1/centered_abs_mean": 1.7614849578961866e-08,
"eval_signal/volume_coverage_1/group_std_mean": 2.2101196428536696e-08,
"eval_signal/volume_coverage_1/group_zero_std_frac": 0.5,
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.7614848746294598e-09,
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 1.7614848746294598e-09,
"eval_signal/volume_coverage_10/centered_abs_mean": 1.73200039199628e-07,
"eval_signal/volume_coverage_10/group_std_mean": 2.176570355061358e-07,
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.5,
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.732000431964309e-08,
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.732000431964309e-08,
"eval_signal/volume_coverage_15/centered_abs_mean": 1.1045231005368805e-06,
"eval_signal/volume_coverage_15/group_std_mean": 1.4119637938847518e-06,
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.5,
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.1045231707029757e-07,
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 1.1045231707029757e-07,
"eval_signal/volume_coverage_20/centered_abs_mean": 0.0016010731924325228,
"eval_signal/volume_coverage_20/group_std_mean": 0.002093737944960594,
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.125,
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00016010730905691162,
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 0.00016010730905691162,
"eval_signal/volume_coverage_25/centered_abs_mean": 0.013395349029451609,
"eval_signal/volume_coverage_25/group_std_mean": 0.018266907893121243,
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.001339534908765927,
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.001339534908765927,
"eval_signal/volume_coverage_5/centered_abs_mean": 4.967059652472017e-08,
"eval_signal/volume_coverage_5/group_std_mean": 6.237662741881422e-08,
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.5,
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.967059707983168e-09,
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 4.967059707983168e-09,
"eval_steps_per_second": 0.128,
"step": 250
},
{
"calibration/aurc": 0.23735285794686112,
"calibration/batch_distribution_entropy": 0.9789768078562158,
"calibration/buffer_distribution_entropy": 0.999559891129499,
"calibration/confidence_entropy": 0.49745628597302466,
"calibration/coverage@0%": 0.02659242099883121,
"calibration/coverage@1%": 0.02659242099883121,
"calibration/coverage@10%": 0.10261541338586462,
"calibration/coverage@15%": 0.28655421229032024,
"calibration/coverage@20%": 0.469679150414842,
"calibration/coverage@25%": 0.5976479013159413,
"calibration/coverage@30%": 0.7012977345284872,
"calibration/coverage@5%": 0.05675342910295696,
"calibration/ece": 0.1411352071071634,
"calibration/mean_confidence": 0.5183968192355468,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 833.8,
"completions/max_terminated_length": 833.8,
"completions/mean_length": 212.0490234375,
"completions/mean_terminated_length": 212.17300720214843,
"completions/min_length": 17.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.816,
"grad_norm": 0.0010728145716711879,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 852410716.0,
"reward": 0.9503415584564209,
"reward_std": 0.09045878946781158,
"rewards/accuracy_reward": 0.58798828125,
"rewards/brier_reward": 0.7771391987800598,
"rewards/confidence_uniqueness_reward": 0.9539396047592164,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0030888376757502558,
"rewards/frontier_ece_reward": 0.0029682864900678397,
"rewards/frontier_entropy_batch_reward": -0.17172586619853974,
"rewards/volume_coverage_0": 1.3925749109411355e-09,
"rewards/volume_coverage_1": 1.3925749109411355e-09,
"rewards/volume_coverage_10": 1.897763094449445e-08,
"rewards/volume_coverage_15": 4.220935450049978e-08,
"rewards/volume_coverage_20": 0.00028864066698588433,
"rewards/volume_coverage_25": 0.004179897159337998,
"rewards/volume_coverage_5": 1.9938326212276535e-09,
"signal/accuracy_reward/centered_abs_mean": 0.099285888671875,
"signal/accuracy_reward/group_std_mean": 0.13266663253307343,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0496429443359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0496429443359375,
"signal/advantage_abs_mean": 0.06852349787950515,
"signal/advantage_pre_scale_abs_mean": 0.06852349787950515,
"signal/advantage_pre_scale_std": 0.11623869091272354,
"signal/advantage_std": 0.11623869091272354,
"signal/brier_reward/centered_abs_mean": 0.1350185066461563,
"signal/brier_reward/group_std_mean": 0.1722848892211914,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013501851074397563,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013501851074397563,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01146103423088789,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015553070977330209,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011461034882813692,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011461034882813692,
"signal/format_reward/centered_abs_mean": 0.0010986328125,
"signal/format_reward/group_std_mean": 0.0025827332865446806,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00054931640625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00054931640625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035511314868927,
"signal/frontier_aurc_reward/group_std_mean": 0.00631262669339776,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.438914693309926e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.438914693309926e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.007486023474484682,
"signal/frontier_ece_reward/group_std_mean": 0.010013842955231667,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007486023707315326,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007486023707315326,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24344446063041686,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3181091547012329,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024344447255134582,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024344447255134582,
"signal/volume_coverage_0/centered_abs_mean": 7.336063845286844e-09,
"signal/volume_coverage_0/group_std_mean": 9.247960708336222e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.675,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.336064200558212e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.336064200558212e-10,
"signal/volume_coverage_1/centered_abs_mean": 7.336063845286844e-09,
"signal/volume_coverage_1/group_std_mean": 9.247960708336222e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.675,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.336064200558212e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.336064200558212e-10,
"signal/volume_coverage_10/centered_abs_mean": 9.209922708919294e-08,
"signal/volume_coverage_10/group_std_mean": 1.172154071582554e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.509375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 9.209923401698461e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 9.209923401698461e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.9229093740923416e-07,
"signal/volume_coverage_15/group_std_mean": 3.7030814752370135e-07,
"signal/volume_coverage_15/group_zero_std_frac": 0.5,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.9229093456706325e-08,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.9229093456706325e-08,
"signal/volume_coverage_20/centered_abs_mean": 0.001035915408283472,
"signal/volume_coverage_20/group_std_mean": 0.0013216811465099453,
"signal/volume_coverage_20/group_zero_std_frac": 0.05,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00010359154257457703,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00010359154257457703,
"signal/volume_coverage_25/centered_abs_mean": 0.008257150836288928,
"signal/volume_coverage_25/group_std_mean": 0.01067999266088009,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008257150650024414,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008257150650024414,
"signal/volume_coverage_5/centered_abs_mean": 9.664288569410928e-09,
"signal/volume_coverage_5/group_std_mean": 1.2128600435090675e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.675,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 9.664288480593085e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 9.664288480593085e-10,
"step": 255
},
{
"calibration/aurc": 0.2834820299595159,
"calibration/batch_distribution_entropy": 0.969581570766635,
"calibration/buffer_distribution_entropy": 0.9996472034772383,
"calibration/confidence_entropy": 0.5025546700203597,
"calibration/coverage@0%": 0.03203125,
"calibration/coverage@1%": 0.03203125,
"calibration/coverage@10%": 0.229296875,
"calibration/coverage@15%": 0.25625,
"calibration/coverage@20%": 0.330859375,
"calibration/coverage@25%": 0.4421875,
"calibration/coverage@30%": 0.560546875,
"calibration/coverage@5%": 0.1625,
"calibration/ece": 0.11813969667517914,
"calibration/mean_confidence": 0.4632866735346511,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 752.4,
"completions/max_terminated_length": 752.4,
"completions/mean_length": 225.29521484375,
"completions/mean_terminated_length": 225.3841583251953,
"completions/min_length": 34.4,
"completions/min_terminated_length": 87.6,
"epoch": 0.832,
"grad_norm": 0.0010744145838543773,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 869726091.0,
"reward": 0.9356490731239319,
"reward_std": 0.08708105832338334,
"rewards/accuracy_reward": 0.5587890625,
"rewards/brier_reward": 0.8003982663154602,
"rewards/confidence_uniqueness_reward": 0.9526532053947449,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.00265838208142668,
"rewards/frontier_ece_reward": 0.0040717648807913065,
"rewards/frontier_entropy_batch_reward": -0.20046012997627258,
"rewards/volume_coverage_0": 5.32470711966937e-09,
"rewards/volume_coverage_1": 5.32470711966937e-09,
"rewards/volume_coverage_10": 2.0827092868103135e-07,
"rewards/volume_coverage_15": 1.1683033221743243e-06,
"rewards/volume_coverage_20": 0.0012323636648943648,
"rewards/volume_coverage_25": 0.006934031657874584,
"rewards/volume_coverage_5": 7.292380654888575e-09,
"signal/accuracy_reward/centered_abs_mean": 0.09027099609375,
"signal/accuracy_reward/group_std_mean": 0.12320152074098586,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045135498046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045135498046875,
"signal/advantage_abs_mean": 0.06571685001254082,
"signal/advantage_pre_scale_abs_mean": 0.06571685001254082,
"signal/advantage_pre_scale_std": 0.11133622229099274,
"signal/advantage_std": 0.11133622229099274,
"signal/brier_reward/centered_abs_mean": 0.12082481384277344,
"signal/brier_reward/group_std_mean": 0.15506627261638642,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01208248157054186,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01208248157054186,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01293247528374195,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017343126982450486,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012932475423440338,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012932475423440338,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002934473962523043,
"signal/frontier_aurc_reward/group_std_mean": 0.005702351313084364,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6680924677057194e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6680924677057194e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.00730133531615138,
"signal/frontier_ece_reward/group_std_mean": 0.009728312119841575,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007301335572265089,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007301335572265089,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.274827253818512,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3532982707023621,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02748272567987442,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02748272567987442,
"signal/volume_coverage_0/centered_abs_mean": 8.213022972825002e-09,
"signal/volume_coverage_0/group_std_mean": 1.061971328653044e-08,
"signal/volume_coverage_0/group_zero_std_frac": 0.60625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.213023483527593e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 8.213023483527593e-10,
"signal/volume_coverage_1/centered_abs_mean": 8.213022972825002e-09,
"signal/volume_coverage_1/group_std_mean": 1.061971328653044e-08,
"signal/volume_coverage_1/group_zero_std_frac": 0.60625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.213023483527593e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 8.213023483527593e-10,
"signal/volume_coverage_10/centered_abs_mean": 3.5972448699794766e-07,
"signal/volume_coverage_10/group_std_mean": 4.666583777179767e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.4875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.597244955244605e-08,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.597244955244605e-08,
"signal/volume_coverage_15/centered_abs_mean": 2.127487846337317e-06,
"signal/volume_coverage_15/group_std_mean": 2.7779381980508334e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.48125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.127487846337317e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.127487846337317e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.0015772737329825758,
"signal/volume_coverage_20/group_std_mean": 0.002045056619681418,
"signal/volume_coverage_20/group_zero_std_frac": 0.153125,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00015772737679071725,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00015772737679071725,
"signal/volume_coverage_25/centered_abs_mean": 0.008564276993274689,
"signal/volume_coverage_25/group_std_mean": 0.010968778282403946,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008564276969991625,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008564276969991625,
"signal/volume_coverage_5/centered_abs_mean": 1.1323550985764541e-08,
"signal/volume_coverage_5/group_std_mean": 1.4639518397530083e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.575,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.1323551429853751e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.1323551429853751e-09,
"step": 260
},
{
"calibration/aurc": 0.32454142744681747,
"calibration/batch_distribution_entropy": 0.9714130609011755,
"calibration/buffer_distribution_entropy": 0.999706410918014,
"calibration/confidence_entropy": 0.49613739343811575,
"calibration/coverage@0%": 0.0078125,
"calibration/coverage@1%": 0.0078125,
"calibration/coverage@10%": 0.135546875,
"calibration/coverage@15%": 0.25078125,
"calibration/coverage@20%": 0.398046875,
"calibration/coverage@25%": 0.484765625,
"calibration/coverage@30%": 0.5375,
"calibration/coverage@5%": 0.07578125,
"calibration/ece": 0.15647323001271482,
"calibration/mean_confidence": 0.5257162202100306,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00087890625,
"completions/max_length": 836.2,
"completions/max_terminated_length": 836.2,
"completions/mean_length": 232.523046875,
"completions/mean_terminated_length": 232.72797241210938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.6,
"epoch": 0.848,
"grad_norm": 0.0009566603694111109,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 887121495.0,
"reward": 0.9239301204681396,
"reward_std": 0.08423375785350799,
"rewards/accuracy_reward": 0.53525390625,
"rewards/brier_reward": 0.7889814138412475,
"rewards/confidence_uniqueness_reward": 0.9528720259666443,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.003320692852139473,
"rewards/frontier_ece_reward": 0.004457092331722379,
"rewards/frontier_entropy_batch_reward": -0.18649887144565583,
"rewards/volume_coverage_0": 6.994451406949764e-09,
"rewards/volume_coverage_1": 6.994451406949764e-09,
"rewards/volume_coverage_10": 2.6992355071087106e-07,
"rewards/volume_coverage_15": 1.2051786683286992e-06,
"rewards/volume_coverage_20": 0.0009353063651360571,
"rewards/volume_coverage_25": 0.007092976756393909,
"rewards/volume_coverage_5": 1.2135791505052396e-08,
"signal/accuracy_reward/centered_abs_mean": 0.080621337890625,
"signal/accuracy_reward/group_std_mean": 0.11271409392356872,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0403106689453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0403106689453125,
"signal/advantage_abs_mean": 0.061996497213840485,
"signal/advantage_pre_scale_abs_mean": 0.061996497213840485,
"signal/advantage_pre_scale_std": 0.10738707631826401,
"signal/advantage_std": 0.10738707631826401,
"signal/brier_reward/centered_abs_mean": 0.1269964024424553,
"signal/brier_reward/group_std_mean": 0.16419816315174102,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012699640169739724,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012699640169739724,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012903736904263496,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018659178167581558,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012903737602755426,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012903737602755426,
"signal/format_reward/centered_abs_mean": 0.001690673828125,
"signal/format_reward/group_std_mean": 0.004635536018759013,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003700101049616933,
"signal/frontier_aurc_reward/group_std_mean": 0.0065599772147834304,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.625126384780742e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.625126384780742e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.007679888419806958,
"signal/frontier_ece_reward/group_std_mean": 0.010139138251543046,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007679888512939215,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007679888512939215,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25838564336299896,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33443763256073,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025838563591241835,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025838563591241835,
"signal/volume_coverage_0/centered_abs_mean": 9.153766811209606e-09,
"signal/volume_coverage_0/group_std_mean": 1.1882709927135693e-08,
"signal/volume_coverage_0/group_zero_std_frac": 0.6,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.153766633573923e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 9.153766633573923e-10,
"signal/volume_coverage_1/centered_abs_mean": 9.153766811209606e-09,
"signal/volume_coverage_1/group_std_mean": 1.1882709927135693e-08,
"signal/volume_coverage_1/group_zero_std_frac": 0.6,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.153766633573923e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 9.153766633573923e-10,
"signal/volume_coverage_10/centered_abs_mean": 3.123025010154379e-07,
"signal/volume_coverage_10/group_std_mean": 4.0348030552195267e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.503125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.1230250030489516e-08,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.1230250030489516e-08,
"signal/volume_coverage_15/centered_abs_mean": 1.6549604197280133e-06,
"signal/volume_coverage_15/group_std_mean": 2.1525930606003386e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.484375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.654960442465381e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.654960442465381e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.0012428722577169538,
"signal/volume_coverage_20/group_std_mean": 0.0016125503461807966,
"signal/volume_coverage_20/group_zero_std_frac": 0.1,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00012428723130142316,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00012428723130142316,
"signal/volume_coverage_25/centered_abs_mean": 0.008850092254579067,
"signal/volume_coverage_25/group_std_mean": 0.011401113867759705,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008850092417560518,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008850092417560518,
"signal/volume_coverage_5/centered_abs_mean": 1.5453646540208865e-08,
"signal/volume_coverage_5/group_std_mean": 2.0075406936825858e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.56875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.545364636257318e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.545364636257318e-09,
"step": 265
},
{
"calibration/aurc": 0.2793327046643886,
"calibration/batch_distribution_entropy": 0.9744149038280241,
"calibration/buffer_distribution_entropy": 0.9996895191216847,
"calibration/confidence_entropy": 0.5058490737154272,
"calibration/coverage@0%": 0.007038894324853229,
"calibration/coverage@1%": 0.007038894324853229,
"calibration/coverage@10%": 0.06914979818982388,
"calibration/coverage@15%": 0.16645364481409003,
"calibration/coverage@20%": 0.2856630687377691,
"calibration/coverage@25%": 0.41389585371819965,
"calibration/coverage@30%": 0.5526090080724071,
"calibration/coverage@5%": 0.015242019324853228,
"calibration/ece": 0.1423724300679615,
"calibration/mean_confidence": 0.5778247707830445,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001171875,
"completions/max_length": 719.0,
"completions/max_terminated_length": 719.0,
"completions/mean_length": 234.0359375,
"completions/mean_terminated_length": 234.31072998046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 101.8,
"epoch": 0.864,
"grad_norm": 0.0012890915386378765,
"learning_rate": 1e-06,
"loss": -0.0006,
"num_tokens": 904504839.0,
"reward": 0.9504047274589539,
"reward_std": 0.09197955429553986,
"rewards/accuracy_reward": 0.59326171875,
"rewards/brier_reward": 0.7776164174079895,
"rewards/confidence_uniqueness_reward": 0.9520756244659424,
"rewards/format_reward": 0.998828125,
"rewards/frontier_aurc_reward": -0.003015703801065683,
"rewards/frontier_ece_reward": 0.0034456577152013777,
"rewards/frontier_entropy_batch_reward": -0.1951846957206726,
"rewards/volume_coverage_0": 5.715915030179986e-09,
"rewards/volume_coverage_1": 5.715915030179986e-09,
"rewards/volume_coverage_10": 2.172148666712559e-07,
"rewards/volume_coverage_15": 1.2539674798972555e-06,
"rewards/volume_coverage_20": 0.0006765133934095502,
"rewards/volume_coverage_25": 0.0053439770825207235,
"rewards/volume_coverage_5": 1.0568708574965058e-08,
"signal/accuracy_reward/centered_abs_mean": 0.097515869140625,
"signal/accuracy_reward/group_std_mean": 0.12857878357172012,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0487579345703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0487579345703125,
"signal/advantage_abs_mean": 0.07035753056406975,
"signal/advantage_pre_scale_abs_mean": 0.07035753056406975,
"signal/advantage_pre_scale_std": 0.11794359385967254,
"signal/advantage_std": 0.11794359385967254,
"signal/brier_reward/centered_abs_mean": 0.13027719110250474,
"signal/brier_reward/group_std_mean": 0.16806340515613555,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013027719967067242,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013027719967067242,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01354727279394865,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020269707962870597,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013547273352742194,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013547273352742194,
"signal/format_reward/centered_abs_mean": 0.00225830078125,
"signal/format_reward/group_std_mean": 0.0062928176019340755,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001129150390625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001129150390625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036219977773725986,
"signal/frontier_aurc_reward/group_std_mean": 0.006439856067299843,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5274974399944765e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5274974399944765e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.00775745278224349,
"signal/frontier_ece_reward/group_std_mean": 0.010124932788312436,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007757453131489456,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007757453131489456,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26516912281513216,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34180880784988404,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026516913250088692,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026516913250088692,
"signal/volume_coverage_0/centered_abs_mean": 1.1564463342494945e-08,
"signal/volume_coverage_0/group_std_mean": 1.4757365462969574e-08,
"signal/volume_coverage_0/group_zero_std_frac": 0.590625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.1564463608948473e-09,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.1564463608948473e-09,
"signal/volume_coverage_1/centered_abs_mean": 1.1564463342494945e-08,
"signal/volume_coverage_1/group_std_mean": 1.4757365462969574e-08,
"signal/volume_coverage_1/group_zero_std_frac": 0.590625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.1564463608948473e-09,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.1564463608948473e-09,
"signal/volume_coverage_10/centered_abs_mean": 3.7779442152441334e-07,
"signal/volume_coverage_10/group_std_mean": 4.909618496640177e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.5,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.7779441797169966e-08,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.7779441797169966e-08,
"signal/volume_coverage_15/centered_abs_mean": 2.4811426328597007e-06,
"signal/volume_coverage_15/group_std_mean": 3.230066158721456e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.5,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.4811426442283845e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.4811426442283845e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.0015919221099466086,
"signal/volume_coverage_20/group_std_mean": 0.002070562425069511,
"signal/volume_coverage_20/group_zero_std_frac": 0.15,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00015919221623335035,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00015919221623335035,
"signal/volume_coverage_25/centered_abs_mean": 0.009017400071024894,
"signal/volume_coverage_25/group_std_mean": 0.011596359312534332,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009017400327138603,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009017400327138603,
"signal/volume_coverage_5/centered_abs_mean": 2.0909239140110002e-08,
"signal/volume_coverage_5/group_std_mean": 2.665617309105528e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.553125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.0909240561195476e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.0909240561195476e-09,
"step": 270
},
{
"calibration/aurc": 0.3976527827378292,
"calibration/batch_distribution_entropy": 0.9842513601406309,
"calibration/buffer_distribution_entropy": 0.9995667312205757,
"calibration/confidence_entropy": 0.4861670621360091,
"calibration/coverage@0%": 0.01608005359257437,
"calibration/coverage@1%": 0.01608005359257437,
"calibration/coverage@10%": 0.03489596599515953,
"calibration/coverage@15%": 0.05529896639116259,
"calibration/coverage@20%": 0.11558296672613119,
"calibration/coverage@25%": 0.1461229080681779,
"calibration/coverage@30%": 0.2656455270397041,
"calibration/coverage@5%": 0.022366890527741358,
"calibration/ece": 0.15154470595631533,
"calibration/mean_confidence": 0.4808891593691776,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 953.6,
"completions/max_terminated_length": 953.6,
"completions/mean_length": 232.0931640625,
"completions/mean_terminated_length": 232.34286193847657,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.6,
"epoch": 0.88,
"grad_norm": 0.0011567287147045135,
"learning_rate": 1e-06,
"loss": -0.0009,
"num_tokens": 922028545.0,
"reward": 0.9079472064971924,
"reward_std": 0.0888668417930603,
"rewards/accuracy_reward": 0.50888671875,
"rewards/brier_reward": 0.783502197265625,
"rewards/confidence_uniqueness_reward": 0.9519115090370178,
"rewards/format_reward": 0.99892578125,
"rewards/frontier_aurc_reward": -0.003150738077238202,
"rewards/frontier_ece_reward": 0.004228654690086842,
"rewards/frontier_entropy_batch_reward": -0.20748784244060517,
"rewards/volume_coverage_0": 1.1710999814340539e-08,
"rewards/volume_coverage_1": 1.1710999814340539e-08,
"rewards/volume_coverage_10": 2.072748031878291e-07,
"rewards/volume_coverage_15": 1.6568533510508131e-06,
"rewards/volume_coverage_20": 0.0014447305584326386,
"rewards/volume_coverage_25": 0.0072022792883217335,
"rewards/volume_coverage_5": 2.0295714620033322e-08,
"signal/accuracy_reward/centered_abs_mean": 0.092388916015625,
"signal/accuracy_reward/group_std_mean": 0.12169073075056076,
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0461944580078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0461944580078125,
"signal/advantage_abs_mean": 0.06861607134342193,
"signal/advantage_pre_scale_abs_mean": 0.06861607134342193,
"signal/advantage_pre_scale_std": 0.11503324806690216,
"signal/advantage_std": 0.11503324806690216,
"signal/brier_reward/centered_abs_mean": 0.12623945921659468,
"signal/brier_reward/group_std_mean": 0.16205861270427704,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012623946368694305,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012623946368694305,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013797298818826676,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019599108770489693,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001379729900509119,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001379729900509119,
"signal/format_reward/centered_abs_mean": 0.002032470703125,
"signal/format_reward/group_std_mean": 0.0050085606053471565,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010162353515625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0010162353515625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003262630198150873,
"signal/frontier_aurc_reward/group_std_mean": 0.005495144985616207,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0782876749290156e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0782876749290156e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.007399953808635474,
"signal/frontier_ece_reward/group_std_mean": 0.009649076312780381,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007399953901767731,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007399953901767731,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.278250652551651,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.353388512134552,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027825065329670905,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027825065329670905,
"signal/volume_coverage_0/centered_abs_mean": 1.2377601166235763e-08,
"signal/volume_coverage_0/group_std_mean": 1.5731156643994382e-08,
"signal/volume_coverage_0/group_zero_std_frac": 0.584375,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.237760094419116e-09,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.237760094419116e-09,
"signal/volume_coverage_1/centered_abs_mean": 1.2377601166235763e-08,
"signal/volume_coverage_1/group_std_mean": 1.5731156643994382e-08,
"signal/volume_coverage_1/group_zero_std_frac": 0.584375,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.237760094419116e-09,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.237760094419116e-09,
"signal/volume_coverage_10/centered_abs_mean": 2.3355193263796535e-07,
"signal/volume_coverage_10/group_std_mean": 2.960320600209343e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.50625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.3355193512486493e-08,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.3355193512486493e-08,
"signal/volume_coverage_15/centered_abs_mean": 1.8819132947101025e-06,
"signal/volume_coverage_15/group_std_mean": 2.3752340666760575e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.5,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.881913277657077e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.881913277657077e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.001760154077783227,
"signal/volume_coverage_20/group_std_mean": 0.0022441008826717735,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00017601540894247593,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00017601540894247593,
"signal/volume_coverage_25/centered_abs_mean": 0.00926213078200817,
"signal/volume_coverage_25/group_std_mean": 0.011914961040019989,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00092621308285743,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.00092621308285743,
"signal/volume_coverage_5/centered_abs_mean": 2.1371749170384645e-08,
"signal/volume_coverage_5/group_std_mean": 2.7185818396446847e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.58125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.137174881511328e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.137174881511328e-09,
"step": 275
},
{
"calibration/aurc": 0.37601968611273684,
"calibration/batch_distribution_entropy": 0.9797260721921596,
"calibration/buffer_distribution_entropy": 0.9994793642036346,
"calibration/confidence_entropy": 0.5158069074862448,
"calibration/coverage@0%": 0.005078125,
"calibration/coverage@1%": 0.005078125,
"calibration/coverage@10%": 0.046875,
"calibration/coverage@15%": 0.085546875,
"calibration/coverage@20%": 0.155078125,
"calibration/coverage@25%": 0.19921875,
"calibration/coverage@30%": 0.2734375,
"calibration/coverage@5%": 0.005078125,
"calibration/ece": 0.1534649556541345,
"calibration/mean_confidence": 0.5264320305031458,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 600.4,
"completions/max_terminated_length": 600.4,
"completions/mean_length": 228.57685546875,
"completions/mean_terminated_length": 228.68885803222656,
"completions/min_length": 39.8,
"completions/min_terminated_length": 95.0,
"epoch": 0.896,
"grad_norm": 0.0013456126907840371,
"learning_rate": 1e-06,
"loss": -0.0004,
"num_tokens": 939480020.0,
"reward": 0.9279617309570313,
"reward_std": 0.08366731405258179,
"rewards/accuracy_reward": 0.54755859375,
"rewards/brier_reward": 0.7841137886047364,
"rewards/confidence_uniqueness_reward": 0.9525452017784118,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.003130245627835393,
"rewards/frontier_ece_reward": 0.0031632784754037856,
"rewards/frontier_entropy_batch_reward": -0.20224941968917848,
"rewards/volume_coverage_0": 7.1152447711497756e-09,
"rewards/volume_coverage_1": 7.1152447711497756e-09,
"rewards/volume_coverage_10": 7.800173946748146e-08,
"rewards/volume_coverage_15": 2.0716353901661934e-06,
"rewards/volume_coverage_20": 0.001087343692779541,
"rewards/volume_coverage_25": 0.0059946583583951,
"rewards/volume_coverage_5": 1.1443985870052131e-08,
"signal/accuracy_reward/centered_abs_mean": 0.080853271484375,
"signal/accuracy_reward/group_std_mean": 0.11332604438066482,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0404266357421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0404266357421875,
"signal/advantage_abs_mean": 0.062341734766960144,
"signal/advantage_pre_scale_abs_mean": 0.062341734766960144,
"signal/advantage_pre_scale_std": 0.10656144320964814,
"signal/advantage_std": 0.10656144320964814,
"signal/brier_reward/centered_abs_mean": 0.11827864497900009,
"signal/brier_reward/group_std_mean": 0.15285103023052216,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011827864684164524,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011827864684164524,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01246865913271904,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01718489658087492,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012468658853322268,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012468658853322268,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002935412712395191,
"signal/frontier_aurc_reward/group_std_mean": 0.005062458151951432,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.669265897769947e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.669265897769947e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.00687292842194438,
"signal/frontier_ece_reward/group_std_mean": 0.009145662747323513,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006872928468510508,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006872928468510508,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27478896379470824,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34888529777526855,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02747889719903469,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02747889719903469,
"signal/volume_coverage_0/centered_abs_mean": 7.998229900607613e-09,
"signal/volume_coverage_0/group_std_mean": 1.0271428330099752e-08,
"signal/volume_coverage_0/group_zero_std_frac": 0.71875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.998230300287901e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.998230300287901e-10,
"signal/volume_coverage_1/centered_abs_mean": 7.998229900607613e-09,
"signal/volume_coverage_1/group_std_mean": 1.0271428330099752e-08,
"signal/volume_coverage_1/group_zero_std_frac": 0.71875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.998230300287901e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.998230300287901e-10,
"signal/volume_coverage_10/centered_abs_mean": 8.842737599934481e-08,
"signal/volume_coverage_10/group_std_mean": 1.134970077032449e-07,
"signal/volume_coverage_10/group_zero_std_frac": 0.515625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.842737742043027e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 8.842737742043027e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.146668634850357e-06,
"signal/volume_coverage_15/group_std_mean": 2.754710089902801e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.4,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.1466687201154854e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.1466687201154854e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.0016437669517472386,
"signal/volume_coverage_20/group_std_mean": 0.0021336987148970364,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00016437669983133675,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00016437669983133675,
"signal/volume_coverage_25/centered_abs_mean": 0.008214055374264718,
"signal/volume_coverage_25/group_std_mean": 0.010711676627397537,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008214055444113911,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008214055444113911,
"signal/volume_coverage_5/centered_abs_mean": 1.234739954725228e-08,
"signal/volume_coverage_5/group_std_mean": 1.5868885405723178e-08,
"signal/volume_coverage_5/group_zero_std_frac": 0.6875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2347399769296885e-09,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.2347399769296885e-09,
"step": 280
},
{
"calibration/aurc": 0.3716300823019802,
"calibration/batch_distribution_entropy": 0.982109064948715,
"calibration/buffer_distribution_entropy": 0.9992196819951579,
"calibration/confidence_entropy": 0.5100215597734005,
"calibration/coverage@0%": 0.007042725498271812,
"calibration/coverage@1%": 0.007042725498271812,
"calibration/coverage@10%": 0.07397031845326203,
"calibration/coverage@15%": 0.1361997093534577,
"calibration/coverage@20%": 0.20663910514602132,
"calibration/coverage@25%": 0.2990031890501309,
"calibration/coverage@30%": 0.3921424074943579,
"calibration/coverage@5%": 0.01487051414797827,
"calibration/ece": 0.1576403585684839,
"calibration/mean_confidence": 0.5154867555187955,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001171875,
"completions/max_length": 849.0,
"completions/max_terminated_length": 849.0,
"completions/mean_length": 229.2068359375,
"completions/mean_terminated_length": 229.48021240234374,
"completions/min_length": 19.4,
"completions/min_terminated_length": 92.6,
"epoch": 0.912,
"grad_norm": 0.0009245709516108036,
"learning_rate": 1e-06,
"loss": -0.0008,
"num_tokens": 956878394.0,
"reward": 0.9328309416770935,
"reward_std": 0.08833001106977463,
"rewards/accuracy_reward": 0.55703125,
"rewards/brier_reward": 0.7842136144638061,
"rewards/confidence_uniqueness_reward": 0.9519566416740417,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_aurc_reward": -0.0029576101573184133,
"rewards/frontier_ece_reward": 0.0033359962981194258,
"rewards/frontier_entropy_batch_reward": -0.19582011103630065,
"rewards/volume_coverage_0": 2.129067899758752e-09,
"rewards/volume_coverage_1": 2.129067899758752e-09,
"rewards/volume_coverage_10": 3.5658557218098963e-09,
"rewards/volume_coverage_15": 1.1135661338812498e-06,
"rewards/volume_coverage_20": 0.0007912997010862455,
"rewards/volume_coverage_25": 0.005880184099078178,
"rewards/volume_coverage_5": 2.129067899758752e-09,
"signal/accuracy_reward/centered_abs_mean": 0.0874755859375,
"signal/accuracy_reward/group_std_mean": 0.1201816201210022,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04373779296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04373779296875,
"signal/advantage_abs_mean": 0.06516797170042991,
"signal/advantage_pre_scale_abs_mean": 0.06516797170042991,
"signal/advantage_pre_scale_std": 0.11177586168050765,
"signal/advantage_std": 0.11177586168050765,
"signal/brier_reward/centered_abs_mean": 0.12937138825654984,
"signal/brier_reward/group_std_mean": 0.1675568252801895,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012937139347195625,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012937139347195625,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01360544990748167,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020998037606477737,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013605450280010701,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013605450280010701,
"signal/format_reward/centered_abs_mean": 0.00263671875,
"signal/format_reward/group_std_mean": 0.007397671788930893,
"signal/format_reward/group_zero_std_frac": 0.959375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001318359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001318359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003030157322064042,
"signal/frontier_aurc_reward/group_std_mean": 0.005202419683337211,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.787696623476222e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.787696623476222e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.007031449675559997,
"signal/frontier_ece_reward/group_std_mean": 0.009188699722290038,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007031450048089027,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007031450048089027,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2668339848518372,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34241083860397337,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026683398336172105,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026683398336172105,
"signal/volume_coverage_0/centered_abs_mean": 4.3604963195775784e-09,
"signal/volume_coverage_0/group_std_mean": 5.642320566323633e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.784375,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.36049640839542e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.36049640839542e-10,
"signal/volume_coverage_1/centered_abs_mean": 4.3604963195775784e-09,
"signal/volume_coverage_1/group_std_mean": 5.642320566323633e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.784375,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.36049640839542e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.36049640839542e-10,
"signal/volume_coverage_10/centered_abs_mean": 7.591896622471949e-09,
"signal/volume_coverage_10/group_std_mean": 9.819433088864572e-09,
"signal/volume_coverage_10/group_zero_std_frac": 0.73125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.591896444836266e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 7.591896444836266e-10,
"signal/volume_coverage_15/centered_abs_mean": 2.0973064010831876e-06,
"signal/volume_coverage_15/group_std_mean": 2.675126233953051e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.05,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.0973064067675297e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.0973064067675297e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.001663878746330738,
"signal/volume_coverage_20/group_std_mean": 0.002154796291142702,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00016638787637930365,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00016638787637930365,
"signal/volume_coverage_25/centered_abs_mean": 0.009200803562998771,
"signal/volume_coverage_25/group_std_mean": 0.011962664313614368,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.000920080381911248,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.000920080381911248,
"signal/volume_coverage_5/centered_abs_mean": 4.3604963195775784e-09,
"signal/volume_coverage_5/group_std_mean": 5.642320566323633e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.784375,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.36049640839542e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.36049640839542e-10,
"step": 285
},
{
"calibration/aurc": 0.4040774793635074,
"calibration/batch_distribution_entropy": 0.9859097293318687,
"calibration/buffer_distribution_entropy": 0.9991986464845336,
"calibration/confidence_entropy": 0.5072232364333589,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.00546875,
"calibration/coverage@15%": 0.005859375,
"calibration/coverage@20%": 0.016015625,
"calibration/coverage@25%": 0.036328125,
"calibration/coverage@30%": 0.231640625,
"calibration/coverage@5%": 0.00546875,
"calibration/ece": 0.1370660257229794,
"calibration/mean_confidence": 0.5252956247397285,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 644.0,
"completions/max_terminated_length": 644.0,
"completions/mean_length": 225.1185546875,
"completions/mean_terminated_length": 225.20532836914063,
"completions/min_length": 66.0,
"completions/min_terminated_length": 101.8,
"epoch": 0.928,
"grad_norm": 0.0007230278570204973,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 974210424.0,
"reward": 0.9181464910507202,
"reward_std": 0.08190946877002717,
"rewards/accuracy_reward": 0.52734375,
"rewards/brier_reward": 0.7798070788383484,
"rewards/confidence_uniqueness_reward": 0.9533717632293701,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0034818340092897413,
"rewards/frontier_ece_reward": 0.003747813869267702,
"rewards/frontier_entropy_batch_reward": -0.19755975306034088,
"rewards/volume_coverage_0": 2.919360059250309e-09,
"rewards/volume_coverage_1": 2.919360059250309e-09,
"rewards/volume_coverage_10": 3.7500787586353114e-09,
"rewards/volume_coverage_15": 1.1286248764008633e-06,
"rewards/volume_coverage_20": 0.000972931594151305,
"rewards/volume_coverage_25": 0.0067935499362647535,
"rewards/volume_coverage_5": 2.919360059250309e-09,
"signal/accuracy_reward/centered_abs_mean": 0.081298828125,
"signal/accuracy_reward/group_std_mean": 0.10915876477956772,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0406494140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0406494140625,
"signal/advantage_abs_mean": 0.06298240423202514,
"signal/advantage_pre_scale_abs_mean": 0.06298240423202514,
"signal/advantage_pre_scale_std": 0.10488016307353973,
"signal/advantage_std": 0.10488016307353973,
"signal/brier_reward/centered_abs_mean": 0.12703848332166673,
"signal/brier_reward/group_std_mean": 0.16295638978481292,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012703849002718925,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012703849002718925,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011908646300435066,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015822481364011765,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011908646440133453,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011908646440133453,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034886215813457965,
"signal/frontier_aurc_reward/group_std_mean": 0.005800313968211413,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.360777020337991e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.360777020337991e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.0073123440146446225,
"signal/frontier_ece_reward/group_std_mean": 0.009510249830782413,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007312344270758331,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007312344270758331,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26927927136421204,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3435124158859253,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026927927508950233,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026927927508950233,
"signal/volume_coverage_0/centered_abs_mean": 4.7993638396803816e-09,
"signal/volume_coverage_0/group_std_mean": 6.0747774810465674e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.79375,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.799363739760309e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.799363739760309e-10,
"signal/volume_coverage_1/centered_abs_mean": 4.7993638396803816e-09,
"signal/volume_coverage_1/group_std_mean": 6.0747774810465674e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.79375,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.799363739760309e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.799363739760309e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.0036284098523537e-08,
"signal/volume_coverage_10/group_std_mean": 2.5118979429805678e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.703125,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0036283521207566e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.0036283521207566e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.117918847943656e-06,
"signal/volume_coverage_15/group_std_mean": 2.70749264927872e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.01875,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.1179188820497074e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.1179188820497074e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.001639655651524663,
"signal/volume_coverage_20/group_std_mean": 0.0021060988772660496,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00016396556748077273,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00016396556748077273,
"signal/volume_coverage_25/centered_abs_mean": 0.009390851110219955,
"signal/volume_coverage_25/group_std_mean": 0.012028184160590172,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009390851017087698,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009390851017087698,
"signal/volume_coverage_5/centered_abs_mean": 4.7993638396803816e-09,
"signal/volume_coverage_5/group_std_mean": 6.0747774810465674e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.79375,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.799363739760309e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.799363739760309e-10,
"step": 290
},
{
"calibration/aurc": 0.2707380283968821,
"calibration/batch_distribution_entropy": 0.9904177652775248,
"calibration/buffer_distribution_entropy": 0.9992103669769856,
"calibration/confidence_entropy": 0.5174462415283381,
"calibration/coverage@0%": 0.035546875,
"calibration/coverage@1%": 0.055078125,
"calibration/coverage@10%": 0.1703125,
"calibration/coverage@15%": 0.276171875,
"calibration/coverage@20%": 0.37265625,
"calibration/coverage@25%": 0.453515625,
"calibration/coverage@30%": 0.575390625,
"calibration/coverage@5%": 0.10078125,
"calibration/ece": 0.08772153547236908,
"calibration/mean_confidence": 0.4955904576824491,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 545.2,
"completions/max_terminated_length": 545.2,
"completions/mean_length": 227.26142578125,
"completions/mean_terminated_length": 227.3970184326172,
"completions/min_length": 37.4,
"completions/min_terminated_length": 99.4,
"epoch": 0.944,
"grad_norm": 0.0008903025300242007,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 991513005.0,
"reward": 0.9210011959075928,
"reward_std": 0.09348525255918502,
"rewards/accuracy_reward": 0.53515625,
"rewards/brier_reward": 0.7760526895523071,
"rewards/confidence_uniqueness_reward": 0.9528594970703125,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.003286689938977361,
"rewards/frontier_ece_reward": 0.0035630079917609693,
"rewards/frontier_entropy_batch_reward": -0.2010483294725418,
"rewards/volume_coverage_0": 9.560014141429817e-10,
"rewards/volume_coverage_1": 9.560014141429817e-10,
"rewards/volume_coverage_10": 1.3513594021574705e-09,
"rewards/volume_coverage_15": 1.10043815766403e-06,
"rewards/volume_coverage_20": 0.0008068614755757153,
"rewards/volume_coverage_25": 0.005824728962033987,
"rewards/volume_coverage_5": 9.560014141429817e-10,
"signal/accuracy_reward/centered_abs_mean": 0.10322265625,
"signal/accuracy_reward/group_std_mean": 0.1366787388920784,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051611328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051611328125,
"signal/advantage_abs_mean": 0.07175759673118591,
"signal/advantage_pre_scale_abs_mean": 0.07175759673118591,
"signal/advantage_pre_scale_std": 0.11863639056682587,
"signal/advantage_std": 0.11863639056682587,
"signal/brier_reward/centered_abs_mean": 0.12774860262870788,
"signal/brier_reward/group_std_mean": 0.16281991302967072,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012774860113859176,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012774860113859176,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012914423458278179,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01821254752576351,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012914423597976566,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012914423597976566,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.0038669900968670845,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031198048498481514,
"signal/frontier_aurc_reward/group_std_mean": 0.005187831167131662,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.899756156897638e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.899756156897638e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.007306762877851725,
"signal/frontier_ece_reward/group_std_mean": 0.009607454948127269,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007306763087399304,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007306763087399304,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2817619800567627,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3556141793727875,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028176198527216912,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028176198527216912,
"signal/volume_coverage_0/centered_abs_mean": 4.504470840771546e-09,
"signal/volume_coverage_0/group_std_mean": 5.734404773249935e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.796875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.5044708518737763e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.5044708518737763e-10,
"signal/volume_coverage_1/centered_abs_mean": 4.504470840771546e-09,
"signal/volume_coverage_1/group_std_mean": 5.734404773249935e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.796875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.5044708518737763e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.5044708518737763e-10,
"signal/volume_coverage_10/centered_abs_mean": 8.61983622257867e-09,
"signal/volume_coverage_10/group_std_mean": 1.0975744579866387e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.765625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.619836289192051e-10,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 8.619836289192051e-10,
"signal/volume_coverage_15/centered_abs_mean": 2.301765857737337e-06,
"signal/volume_coverage_15/group_std_mean": 2.931733843070106e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.203125,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.3017659884772e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.3017659884772e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.001643260568380356,
"signal/volume_coverage_20/group_std_mean": 0.0021156548289582135,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0001643260649871081,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0001643260649871081,
"signal/volume_coverage_25/centered_abs_mean": 0.009385128132998943,
"signal/volume_coverage_25/group_std_mean": 0.01200024802237749,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009385128272697329,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009385128272697329,
"signal/volume_coverage_5/centered_abs_mean": 4.504470840771546e-09,
"signal/volume_coverage_5/group_std_mean": 5.734404773249935e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.796875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.5044708518737763e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.5044708518737763e-10,
"step": 295
},
{
"calibration/aurc": 0.3539351773524442,
"calibration/batch_distribution_entropy": 0.9919915695023717,
"calibration/buffer_distribution_entropy": 0.9992684340977069,
"calibration/confidence_entropy": 0.5006099573341842,
"calibration/coverage@0%": 0.00703125,
"calibration/coverage@1%": 0.00703125,
"calibration/coverage@10%": 0.0296875,
"calibration/coverage@15%": 0.111328125,
"calibration/coverage@20%": 0.241796875,
"calibration/coverage@25%": 0.295703125,
"calibration/coverage@30%": 0.346484375,
"calibration/coverage@5%": 0.012890625,
"calibration/ece": 0.13862321886817325,
"calibration/mean_confidence": 0.5191145025524813,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 659.0,
"completions/max_terminated_length": 659.0,
"completions/mean_length": 226.6712890625,
"completions/mean_terminated_length": 226.7602111816406,
"completions/min_length": 21.4,
"completions/min_terminated_length": 102.2,
"epoch": 0.96,
"grad_norm": 0.0007685109740123153,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 1008774439.0,
"reward": 0.9186918616294861,
"reward_std": 0.07693372666835785,
"rewards/accuracy_reward": 0.52412109375,
"rewards/brier_reward": 0.7954145908355713,
"rewards/confidence_uniqueness_reward": 0.9527331829071045,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0033720153383910655,
"rewards/frontier_ece_reward": 0.004499087203294038,
"rewards/frontier_entropy_batch_reward": -0.19327735304832458,
"rewards/volume_coverage_0": 1.9510296711544584e-09,
"rewards/volume_coverage_1": 1.9510296711544584e-09,
"rewards/volume_coverage_10": 1.2793440679059741e-08,
"rewards/volume_coverage_15": 1.9328432472320856e-06,
"rewards/volume_coverage_20": 0.0012584096053615212,
"rewards/volume_coverage_25": 0.008058086410164833,
"rewards/volume_coverage_5": 1.9510296711544584e-09,
"signal/accuracy_reward/centered_abs_mean": 0.071441650390625,
"signal/accuracy_reward/group_std_mean": 0.09916180819272995,
"signal/accuracy_reward/group_zero_std_frac": 0.7,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0357208251953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0357208251953125,
"signal/advantage_abs_mean": 0.05721310302615166,
"signal/advantage_pre_scale_abs_mean": 0.05721310302615166,
"signal/advantage_pre_scale_std": 0.10052263587713242,
"signal/advantage_std": 0.10052263587713242,
"signal/brier_reward/centered_abs_mean": 0.11715176105499267,
"signal/brier_reward/group_std_mean": 0.152744123339653,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011715176329016686,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011715176329016686,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012735397927463055,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017090072110295295,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012735398719087242,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012735398719087242,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031671120319515466,
"signal/frontier_aurc_reward/group_std_mean": 0.00535587165504694,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.958890083595179e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.958890083595179e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.007282440643757581,
"signal/frontier_ece_reward/group_std_mean": 0.009441747888922691,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007282441016286612,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007282441016286612,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2648331612348557,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33835762143135073,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026483316719532014,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026483316719532014,
"signal/volume_coverage_0/centered_abs_mean": 3.5475106408000555e-09,
"signal/volume_coverage_0/group_std_mean": 4.6579573975691345e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.840625,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.547510674106746e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 3.547510674106746e-10,
"signal/volume_coverage_1/centered_abs_mean": 3.5475106408000555e-09,
"signal/volume_coverage_1/group_std_mean": 4.6579573975691345e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.840625,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.547510674106746e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 3.547510674106746e-10,
"signal/volume_coverage_10/centered_abs_mean": 2.4543501098150956e-08,
"signal/volume_coverage_10/group_std_mean": 3.1358744401188686e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.721875,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.4543501453422324e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.4543501453422324e-09,
"signal/volume_coverage_15/centered_abs_mean": 2.320456633242429e-06,
"signal/volume_coverage_15/group_std_mean": 2.9677721840926096e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.1,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.3204566730328223e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.3204566730328223e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.001473587960936129,
"signal/volume_coverage_20/group_std_mean": 0.0019304967951029538,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00014735879667568952,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00014735879667568952,
"signal/volume_coverage_25/centered_abs_mean": 0.008963228948414326,
"signal/volume_coverage_25/group_std_mean": 0.011512291803956031,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008963228901848197,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008963228901848197,
"signal/volume_coverage_5/centered_abs_mean": 3.5475106408000555e-09,
"signal/volume_coverage_5/group_std_mean": 4.6579573975691345e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.840625,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.547510674106746e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 3.547510674106746e-10,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.469143428109443,
"eval_calibration/batch_distribution_entropy": 0.9061692855550176,
"eval_calibration/buffer_distribution_entropy": 0.9992889529921487,
"eval_calibration/confidence_entropy": 0.5129001107673137,
"eval_calibration/coverage@0%": 0.0625,
"eval_calibration/coverage@1%": 0.0625,
"eval_calibration/coverage@10%": 0.0625,
"eval_calibration/coverage@15%": 0.0703125,
"eval_calibration/coverage@20%": 0.0859375,
"eval_calibration/coverage@25%": 0.1015625,
"eval_calibration/coverage@30%": 0.2109375,
"eval_calibration/coverage@5%": 0.0625,
"eval_calibration/ece": 0.19129152092641719,
"eval_calibration/mean_confidence": 0.47514103987281153,
"eval_completions/clipped_ratio": 0.002155172413793094,
"eval_completions/max_length": 440.5,
"eval_completions/max_terminated_length": 440.5,
"eval_completions/mean_length": 227.2459602355957,
"eval_completions/mean_terminated_length": 227.75753784179688,
"eval_completions/min_length": 82.5,
"eval_completions/min_terminated_length": 118.0,
"eval_loss": 0.0,
"eval_num_tokens": 1008774439.0,
"eval_reward": 0.7893799394369125,
"eval_reward_std": 0.2509300038218498,
"eval_rewards/accuracy_reward": 0.443359375,
"eval_rewards/brier_reward": 0.7772131115198135,
"eval_rewards/confidence_uniqueness_reward": 0.8947123885154724,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.004324580018874258,
"eval_rewards/frontier_ece_reward": 0.004353664699010551,
"eval_rewards/frontier_entropy_batch_reward": -0.998046875,
"eval_rewards/volume_coverage_0": 4.074710624468736e-09,
"eval_rewards/volume_coverage_1": 4.074710624468736e-09,
"eval_rewards/volume_coverage_10": 7.91095577934442e-09,
"eval_rewards/volume_coverage_15": 9.340307514094093e-07,
"eval_rewards/volume_coverage_20": 0.0014538196846842766,
"eval_rewards/volume_coverage_25": 0.007621690630912781,
"eval_rewards/volume_coverage_5": 4.074710624468736e-09,
"eval_runtime": 30.3472,
"eval_samples_per_second": 16.476,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4776611328125,
"eval_signal/accuracy_reward/group_std_mean": 0.49625900387763977,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23883056640625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23883056640625,
"eval_signal/advantage_abs_mean": 0.23880807682871819,
"eval_signal/advantage_pre_scale_abs_mean": 0.23880807682871819,
"eval_signal/advantage_pre_scale_std": 0.2480723336338997,
"eval_signal/advantage_std": 0.2480723336338997,
"eval_signal/brier_reward/centered_abs_mean": 0.1952587403357029,
"eval_signal/brier_reward/group_std_mean": 0.2449491173028946,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019525874871760607,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019525874871760607,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04094819072633982,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05267652776092291,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004094819072633982,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004094819072633982,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005890812375582755,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.012180331395938993,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.363515578617807e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.363515578617807e-05,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.009545863838866353,
"eval_signal/frontier_ece_reward/group_std_mean": 0.012291400227695704,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009545864013489336,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009545864013489336,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003784179862122983,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003784179862122983,
"eval_signal/volume_coverage_0/centered_abs_mean": 1.0248968562720506e-08,
"eval_signal/volume_coverage_0/group_std_mean": 1.2958092820980482e-08,
"eval_signal/volume_coverage_0/group_zero_std_frac": 0.5625,
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.0248968618231658e-09,
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 1.0248968618231658e-09,
"eval_signal/volume_coverage_1/centered_abs_mean": 1.0248968562720506e-08,
"eval_signal/volume_coverage_1/group_std_mean": 1.2958092820980482e-08,
"eval_signal/volume_coverage_1/group_zero_std_frac": 0.5625,
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.0248968618231658e-09,
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 1.0248968618231658e-09,
"eval_signal/volume_coverage_10/centered_abs_mean": 2.0943740519641096e-08,
"eval_signal/volume_coverage_10/group_std_mean": 2.6666832741994995e-08,
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.5625,
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0943741740886423e-09,
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 2.0943741740886423e-09,
"eval_signal/volume_coverage_15/centered_abs_mean": 2.8810036951654183e-06,
"eval_signal/volume_coverage_15/group_std_mean": 3.6288600995249e-06,
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.25,
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.881003808852256e-07,
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 2.881003808852256e-07,
"eval_signal/volume_coverage_20/centered_abs_mean": 0.003215643868315965,
"eval_signal/volume_coverage_20/group_std_mean": 0.004141232697293162,
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00032156440283870324,
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 0.00032156440283870324,
"eval_signal/volume_coverage_25/centered_abs_mean": 0.01439478388056159,
"eval_signal/volume_coverage_25/group_std_mean": 0.018871094100177288,
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0014394783938769251,
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.0014394783938769251,
"eval_signal/volume_coverage_5/centered_abs_mean": 1.0248968562720506e-08,
"eval_signal/volume_coverage_5/group_std_mean": 1.2958092820980482e-08,
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.5625,
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.0248968618231658e-09,
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 1.0248968618231658e-09,
"eval_steps_per_second": 0.132,
"step": 300
},
{
"calibration/aurc": 0.27828423584262174,
"calibration/batch_distribution_entropy": 0.9783725227243985,
"calibration/buffer_distribution_entropy": 0.9993470822495265,
"calibration/confidence_entropy": 0.5214416379067564,
"calibration/coverage@0%": 0.014858274217221135,
"calibration/coverage@1%": 0.014858274217221135,
"calibration/coverage@10%": 0.18521740459882582,
"calibration/coverage@15%": 0.32198737157534246,
"calibration/coverage@20%": 0.4349055161448141,
"calibration/coverage@25%": 0.49781525195694715,
"calibration/coverage@30%": 0.5665874204990216,
"calibration/coverage@5%": 0.0633011252446184,
"calibration/ece": 0.13189558718940492,
"calibration/mean_confidence": 0.5073180815615211,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 697.6,
"completions/max_terminated_length": 697.6,
"completions/mean_length": 227.09482421875,
"completions/mean_terminated_length": 227.33677673339844,
"completions/min_length": 18.8,
"completions/min_terminated_length": 99.0,
"epoch": 0.976,
"grad_norm": 0.001216869568452239,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 1025961010.0,
"reward": 0.9318997025489807,
"reward_std": 0.08647259920835496,
"rewards/accuracy_reward": 0.5517578125,
"rewards/brier_reward": 0.7864073157310486,
"rewards/confidence_uniqueness_reward": 0.952651071548462,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_aurc_reward": -0.0029323163442313673,
"rewards/frontier_ece_reward": 0.0035886369296349585,
"rewards/frontier_entropy_batch_reward": -0.18277476131916046,
"rewards/volume_coverage_0": 9.206329920630196e-10,
"rewards/volume_coverage_1": 9.206329920630196e-10,
"rewards/volume_coverage_10": 4.901805539248017e-09,
"rewards/volume_coverage_15": 1.2830848845624132e-06,
"rewards/volume_coverage_20": 0.0008759050746448338,
"rewards/volume_coverage_25": 0.00666112988255918,
"rewards/volume_coverage_5": 9.206329920630196e-10,
"signal/accuracy_reward/centered_abs_mean": 0.09208984375,
"signal/accuracy_reward/group_std_mean": 0.1234636165201664,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046044921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.046044921875,
"signal/advantage_abs_mean": 0.06493477523326874,
"signal/advantage_pre_scale_abs_mean": 0.06493477523326874,
"signal/advantage_pre_scale_std": 0.11201283037662506,
"signal/advantage_std": 0.11201283037662506,
"signal/brier_reward/centered_abs_mean": 0.12516404688358307,
"signal/brier_reward/group_std_mean": 0.162343767285347,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012516404874622822,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012516404874622822,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013118837960064411,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01964471973478794,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013118838891386987,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013118838891386987,
"signal/format_reward/centered_abs_mean": 0.00257568359375,
"signal/format_reward/group_std_mean": 0.006574305240064859,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001287841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001287841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027899232693016527,
"signal/frontier_aurc_reward/group_std_mean": 0.0048869956284761425,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4874041375587694e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4874041375587694e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.0070959897711873055,
"signal/frontier_ece_reward/group_std_mean": 0.009288905560970307,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007095989771187305,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007095989771187305,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24910698533058168,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32469738125801084,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02491069883108139,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02491069883108139,
"signal/volume_coverage_0/centered_abs_mean": 5.720340201520457e-09,
"signal/volume_coverage_0/group_std_mean": 7.311743388527248e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.79375,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.72034031254276e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.72034031254276e-10,
"signal/volume_coverage_1/centered_abs_mean": 5.720340201520457e-09,
"signal/volume_coverage_1/group_std_mean": 7.311743388527248e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.79375,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.72034031254276e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.72034031254276e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.532603484122319e-08,
"signal/volume_coverage_10/group_std_mean": 1.9649526628029434e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.64375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.5326034663587507e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.5326034663587507e-09,
"signal/volume_coverage_15/centered_abs_mean": 1.4135470792098203e-06,
"signal/volume_coverage_15/group_std_mean": 1.8063871948470479e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.059375,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4135470536302818e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.4135470536302818e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.001932887057773769,
"signal/volume_coverage_20/group_std_mean": 0.0025168229360133408,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00019328870403114706,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00019328870403114706,
"signal/volume_coverage_25/centered_abs_mean": 0.009515535458922387,
"signal/volume_coverage_25/group_std_mean": 0.012358383275568485,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009515536017715931,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009515536017715931,
"signal/volume_coverage_5/centered_abs_mean": 5.720340201520457e-09,
"signal/volume_coverage_5/group_std_mean": 7.311743388527248e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.79375,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.72034031254276e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 5.72034031254276e-10,
"step": 305
},
{
"calibration/aurc": 0.3711929587850343,
"calibration/batch_distribution_entropy": 0.9894443021341646,
"calibration/buffer_distribution_entropy": 0.9993375714297741,
"calibration/confidence_entropy": 0.510071766819145,
"calibration/coverage@0%": 0.01015625,
"calibration/coverage@1%": 0.01015625,
"calibration/coverage@10%": 0.059375,
"calibration/coverage@15%": 0.087109375,
"calibration/coverage@20%": 0.11171875,
"calibration/coverage@25%": 0.161328125,
"calibration/coverage@30%": 0.323828125,
"calibration/coverage@5%": 0.0140625,
"calibration/ece": 0.14348603278692526,
"calibration/mean_confidence": 0.4779751045544707,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 504.2,
"completions/max_terminated_length": 504.2,
"completions/mean_length": 219.91044921875,
"completions/mean_terminated_length": 220.01998596191407,
"completions/min_length": 58.8,
"completions/min_terminated_length": 98.2,
"epoch": 0.992,
"grad_norm": 0.0009316056966781616,
"learning_rate": 1e-06,
"loss": -0.0005,
"num_tokens": 1043341373.0,
"reward": 0.92005295753479,
"reward_std": 0.08265489488840103,
"rewards/accuracy_reward": 0.53046875,
"rewards/brier_reward": 0.7892962336540222,
"rewards/confidence_uniqueness_reward": 0.9527257204055786,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0031668921466916798,
"rewards/frontier_ece_reward": 0.0035965410992503167,
"rewards/frontier_entropy_batch_reward": -0.20363328158855437,
"rewards/volume_coverage_0": 2.720843217396407e-09,
"rewards/volume_coverage_1": 2.720843217396407e-09,
"rewards/volume_coverage_10": 1.035092784817948e-08,
"rewards/volume_coverage_15": 1.4946397698167856e-06,
"rewards/volume_coverage_20": 0.001336311805061996,
"rewards/volume_coverage_25": 0.007699974346905947,
"rewards/volume_coverage_5": 2.720843217396407e-09,
"signal/accuracy_reward/centered_abs_mean": 0.08271484375,
"signal/accuracy_reward/group_std_mean": 0.10999367833137512,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041357421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.041357421875,
"signal/advantage_abs_mean": 0.06338016986846924,
"signal/advantage_pre_scale_abs_mean": 0.06338016986846924,
"signal/advantage_pre_scale_std": 0.10698268860578537,
"signal/advantage_std": 0.10698268860578537,
"signal/brier_reward/centered_abs_mean": 0.12253952324390412,
"signal/brier_reward/group_std_mean": 0.15773009061813353,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012253952585160733,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012253952585160733,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01234852969646454,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016739832423627376,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001234852964989841,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001234852964989841,
"signal/format_reward/centered_abs_mean": 0.000933837890625,
"signal/format_reward/group_std_mean": 0.0024258273653686045,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002989071374759078,
"signal/frontier_aurc_reward/group_std_mean": 0.005077757174149156,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.736339276656508e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.736339276656508e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.006974827032536268,
"signal/frontier_ece_reward/group_std_mean": 0.009086176566779614,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006974827032536268,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006974827032536268,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27312966585159304,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34787346720695494,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02731296643614769,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02731296643614769,
"signal/volume_coverage_0/centered_abs_mean": 5.004535186259318e-09,
"signal/volume_coverage_0/group_std_mean": 6.371262450954873e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.7875,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.004535175157087e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.004535175157087e-10,
"signal/volume_coverage_1/centered_abs_mean": 5.004535186259318e-09,
"signal/volume_coverage_1/group_std_mean": 6.371262450954873e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.7875,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.004535175157087e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.004535175157087e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.767177568723355e-08,
"signal/volume_coverage_10/group_std_mean": 2.240396206332207e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.50625,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.76717753763711e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.76717753763711e-09,
"signal/volume_coverage_15/centered_abs_mean": 1.7186309264616284e-06,
"signal/volume_coverage_15/group_std_mean": 2.15395040754629e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.0,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.7186309761996199e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.7186309761996199e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.002075748727656901,
"signal/volume_coverage_20/group_std_mean": 0.002644157502800226,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00020757487509399654,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00020757487509399654,
"signal/volume_coverage_25/centered_abs_mean": 0.01014525257050991,
"signal/volume_coverage_25/group_std_mean": 0.012951592169702052,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0010145252919755876,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0010145252919755876,
"signal/volume_coverage_5/centered_abs_mean": 5.004535186259318e-09,
"signal/volume_coverage_5/group_std_mean": 6.371262450954873e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.7875,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.004535175157087e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 5.004535175157087e-10,
"step": 310
},
{
"calibration/aurc": 0.29983717719854924,
"calibration/batch_distribution_entropy": 0.9600171414500873,
"calibration/buffer_distribution_entropy": 0.9993445118220194,
"calibration/confidence_entropy": 0.4847878699637469,
"calibration/coverage@0%": 0.0126953125,
"calibration/coverage@1%": 0.0126953125,
"calibration/coverage@10%": 0.041015625,
"calibration/coverage@15%": 0.060546875,
"calibration/coverage@20%": 0.1005859375,
"calibration/coverage@25%": 0.359375,
"calibration/coverage@30%": 0.607421875,
"calibration/coverage@5%": 0.03515625,
"calibration/ece": 0.16615515511103873,
"calibration/mean_confidence": 0.5929890637724151,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000244140625,
"completions/max_length": 505.5,
"completions/max_terminated_length": 505.5,
"completions/mean_length": 218.71721649169922,
"completions/mean_terminated_length": 218.77042388916016,
"completions/min_length": 46.5,
"completions/min_terminated_length": 87.5,
"epoch": 0.9984,
"num_tokens": 1050240623.0,
"reward": 0.9275875687599182,
"reward_std": 0.0807461366057396,
"rewards/accuracy_reward": 0.55322265625,
"rewards/brier_reward": 0.7675114274024963,
"rewards/confidence_uniqueness_reward": 0.9522386491298676,
"rewards/format_reward": 0.999755859375,
"rewards/frontier_aurc_reward": -0.003080901689827442,
"rewards/frontier_ece_reward": 0.002699983073398471,
"rewards/frontier_entropy_batch_reward": -0.21747954189777374,
"rewards/volume_coverage_0": 2.083941741393147e-09,
"rewards/volume_coverage_1": 2.083941741393147e-09,
"rewards/volume_coverage_10": 1.1432189417348582e-08,
"rewards/volume_coverage_15": -8.116828098536644e-08,
"rewards/volume_coverage_20": 0.0009809440525714308,
"rewards/volume_coverage_25": 0.005417217034846544,
"rewards/volume_coverage_5": 2.083941741393147e-09,
"signal/accuracy_reward/centered_abs_mean": 0.075653076171875,
"signal/accuracy_reward/group_std_mean": 0.10233127698302269,
"signal/accuracy_reward/group_zero_std_frac": 0.6953125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0378265380859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0378265380859375,
"signal/advantage_abs_mean": 0.06188248656690121,
"signal/advantage_pre_scale_abs_mean": 0.06188248656690121,
"signal/advantage_pre_scale_std": 0.10488305985927582,
"signal/advantage_std": 0.10488305985927582,
"signal/brier_reward/centered_abs_mean": 0.12014567106962204,
"signal/brier_reward/group_std_mean": 0.1542070060968399,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012014567852020264,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012014567852020264,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011747919954359531,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015483672730624676,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011747920652851462,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011747920652851462,
"signal/format_reward/centered_abs_mean": 0.0004730224609375,
"signal/format_reward/group_std_mean": 0.0013810679083690047,
"signal/format_reward/group_zero_std_frac": 0.9921875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002737033413723111,
"signal/frontier_aurc_reward/group_std_mean": 0.004487208207137883,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4212920581921935e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4212920581921935e-05,
"signal/frontier_ece_reward/centered_abs_mean": 0.006887981900945306,
"signal/frontier_ece_reward/group_std_mean": 0.008894495666027069,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006887982017360628,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006887982017360628,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2844991385936737,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3585272878408432,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028449914418160915,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028449914418160915,
"signal/volume_coverage_0/centered_abs_mean": 4.031929234926679e-09,
"signal/volume_coverage_0/group_std_mean": 5.142488657128297e-09,
"signal/volume_coverage_0/group_zero_std_frac": 0.8125,
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.031929234926679e-10,
"signal/volume_coverage_0/weight": 0.10000000149011612,
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.031929234926679e-10,
"signal/volume_coverage_1/centered_abs_mean": 4.031929234926679e-09,
"signal/volume_coverage_1/group_std_mean": 5.142488657128297e-09,
"signal/volume_coverage_1/group_zero_std_frac": 0.8125,
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.031929234926679e-10,
"signal/volume_coverage_1/weight": 0.10000000149011612,
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.031929234926679e-10,
"signal/volume_coverage_10/centered_abs_mean": 1.602587573756864e-08,
"signal/volume_coverage_10/group_std_mean": 2.028727230651839e-08,
"signal/volume_coverage_10/group_zero_std_frac": 0.5234375,
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.602587618165785e-09,
"signal/volume_coverage_10/weight": 0.10000000149011612,
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.602587618165785e-09,
"signal/volume_coverage_15/centered_abs_mean": 1.2426704643075936e-06,
"signal/volume_coverage_15/group_std_mean": 1.5521810041718709e-06,
"signal/volume_coverage_15/group_zero_std_frac": 0.015625,
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.2426705353618672e-07,
"signal/volume_coverage_15/weight": 0.10000000149011612,
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.2426705353618672e-07,
"signal/volume_coverage_20/centered_abs_mean": 0.0020069401944056153,
"signal/volume_coverage_20/group_std_mean": 0.0025554284220561385,
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00020069401944056153,
"signal/volume_coverage_20/weight": 0.10000000149011612,
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00020069401944056153,
"signal/volume_coverage_25/centered_abs_mean": 0.009221313055604696,
"signal/volume_coverage_25/group_std_mean": 0.011924784164875746,
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009221313230227679,
"signal/volume_coverage_25/weight": 0.10000000149011612,
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009221313230227679,
"signal/volume_coverage_5/centered_abs_mean": 4.031929234926679e-09,
"signal/volume_coverage_5/group_std_mean": 5.142488657128297e-09,
"signal/volume_coverage_5/group_zero_std_frac": 0.8125,
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.031929234926679e-10,
"signal/volume_coverage_5/weight": 0.10000000149011612,
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.031929234926679e-10,
"step": 312,
"total_flos": 0.0,
"train_loss": 1.2157592629172052e-05,
"train_runtime": 59916.4251,
"train_samples_per_second": 0.334,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1050240623,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}