Files
RLCR-v4-ks-uniqueness-cov0-…/trainer_state.json
ModelHub XC ffbd0ed9c9 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy50-cold-math
Source: Original Platform
2026-05-27 18:56:20 +08:00

7368 lines
475 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49919376007799904,
"eval_steps": 50,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.4786468696997992,
"calibration/batch_distribution_entropy": 0.27434989424557693,
"calibration/batch_entropy_100bins": 0.3452116907370852,
"calibration/batch_entropy_10bins": 0.27434989424557693,
"calibration/batch_entropy_50bins": 0.40370561408688826,
"calibration/batch_uniqueness": 0.4969804532848675,
"calibration/confidence_entropy": 0.215996847848038,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.45861411649511047,
"calibration/mean_confidence": 0.9140472626196257,
"calibration/prompt_uniqueness": 0.35674800174725496,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020225694444444442,
"completions/max_length": 4034.2,
"completions/max_terminated_length": 4034.2,
"completions/mean_length": 518.5538208007813,
"completions/mean_terminated_length": 529.2614379882813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011999850001874977,
"grad_norm": 0.0034298275131732225,
"learning_rate": 5.952380952380953e-07,
"loss": 0.0041,
"num_tokens": 9087948.0,
"reward": 0.48353423476219176,
"reward_std": 0.4472260117530823,
"rewards/accuracy_reward": 0.25295138359069824,
"rewards/brier_reward": 0.3069717109203339,
"rewards/confidence_uniqueness_reward": 0.28508294820785524,
"rewards/format_reward": 0.5980902671813965,
"rewards/frontier_aurc_reward": 0.26909309029579165,
"rewards/frontier_coverage_0": 0.26909309029579165,
"rewards/frontier_coverage_1": 0.26909309029579165,
"rewards/frontier_coverage_10": 0.26909309029579165,
"rewards/frontier_coverage_15": 0.26909309029579165,
"rewards/frontier_coverage_20": 0.26909309029579165,
"rewards/frontier_coverage_25": 0.26909309029579165,
"rewards/frontier_coverage_5": 0.26909309029579165,
"rewards/frontier_ece_reward": 0.26909309029579165,
"rewards/frontier_entropy_batch_reward": -0.5501068949699401,
"signal/accuracy_reward/centered_abs_mean": 0.30129122734069824,
"signal/accuracy_reward/group_bin_occupancy": 0.2361111111111111,
"signal/accuracy_reward/group_std_mean": 0.3599981427192688,
"signal/accuracy_reward/group_zero_std_frac": 0.11111111268401146,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15064561367034912,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15064561367034912,
"signal/advantage_abs_mean": 0.38422595858573916,
"signal/advantage_pre_scale_abs_mean": 0.38422595858573916,
"signal/advantage_pre_scale_std": 0.4541194498538971,
"signal/advantage_std": 0.4541194498538971,
"signal/brier_reward/centered_abs_mean": 0.31531033515930174,
"signal/brier_reward/group_bin_occupancy": 0.5211805555555555,
"signal/brier_reward/group_std_mean": 0.36791505217552184,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031531032919883725,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.031531032919883725,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23501766622066497,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6003472222222223,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2864716470241547,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023501767963171005,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023501767963171005,
"signal/format_reward/centered_abs_mean": 0.43889973759651185,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.4739928424358368,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21944986879825593,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.21944986879825593,
"signal/frontier_aurc_reward/centered_abs_mean": 0.3046343684196472,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_aurc_reward/group_std_mean": 0.36159105896949767,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_0/centered_abs_mean": 0.3046343684196472,
"signal/frontier_coverage_0/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_coverage_0/group_std_mean": 0.36159105896949767,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_1/centered_abs_mean": 0.3046343684196472,
"signal/frontier_coverage_1/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_coverage_1/group_std_mean": 0.36159105896949767,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_10/centered_abs_mean": 0.3046343684196472,
"signal/frontier_coverage_10/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_coverage_10/group_std_mean": 0.36159105896949767,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_15/centered_abs_mean": 0.3046343684196472,
"signal/frontier_coverage_15/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_coverage_15/group_std_mean": 0.36159105896949767,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_20/centered_abs_mean": 0.3046343684196472,
"signal/frontier_coverage_20/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_coverage_20/group_std_mean": 0.36159105896949767,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_25/centered_abs_mean": 0.3046343684196472,
"signal/frontier_coverage_25/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_coverage_25/group_std_mean": 0.36159105896949767,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_5/centered_abs_mean": 0.3046343684196472,
"signal/frontier_coverage_5/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_coverage_5/group_std_mean": 0.36159105896949767,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003807929763570428,
"signal/frontier_ece_reward/centered_abs_mean": 0.3046343684196472,
"signal/frontier_ece_reward/group_bin_occupancy": 0.4024305555555555,
"signal/frontier_ece_reward/group_std_mean": 0.36159105896949767,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030463438108563425,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030463438108563425,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45770797729492185,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3309027777777778,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.49182985424995423,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0457707978785038,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0457707978785038,
"step": 5
},
{
"calibration/aurc": 0.5215323422297184,
"calibration/batch_distribution_entropy": 0.27063770046218427,
"calibration/batch_entropy_100bins": 0.3524338658024668,
"calibration/batch_entropy_10bins": 0.27063770046218427,
"calibration/batch_entropy_50bins": 0.41210669269116024,
"calibration/batch_uniqueness": 0.5186394142692434,
"calibration/confidence_entropy": 0.22697660378505544,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4744829452486872,
"calibration/mean_confidence": 0.9174019066918241,
"calibration/prompt_uniqueness": 0.4043385400555728,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017274305555555557,
"completions/max_length": 4042.4,
"completions/max_terminated_length": 4042.4,
"completions/mean_length": 478.6330810546875,
"completions/mean_terminated_length": 487.2344909667969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 19.2,
"epoch": 0.023999700003749954,
"grad_norm": 0.020310023799538612,
"learning_rate": 1.1904761904761906e-06,
"loss": 0.0026,
"num_tokens": 17684521.0,
"reward": 0.5639899730682373,
"reward_std": 0.4257666528224945,
"rewards/accuracy_reward": 0.28472222983837125,
"rewards/brier_reward": 0.34975607991218566,
"rewards/confidence_uniqueness_reward": 0.35827080607414247,
"rewards/format_reward": 0.7129340171813965,
"rewards/frontier_aurc_reward": 0.30107017755508425,
"rewards/frontier_coverage_0": 0.30107017755508425,
"rewards/frontier_coverage_1": 0.30107017755508425,
"rewards/frontier_coverage_10": 0.30107017755508425,
"rewards/frontier_coverage_15": 0.30107017755508425,
"rewards/frontier_coverage_20": 0.30107017755508425,
"rewards/frontier_coverage_25": 0.30107017755508425,
"rewards/frontier_coverage_5": 0.30107017755508425,
"rewards/frontier_ece_reward": 0.30107017755508425,
"rewards/frontier_entropy_batch_reward": -0.6585487723350525,
"signal/accuracy_reward/centered_abs_mean": 0.31558159589767454,
"signal/accuracy_reward/group_bin_occupancy": 0.24027777777777776,
"signal/accuracy_reward/group_std_mean": 0.37627485394477844,
"signal/accuracy_reward/group_zero_std_frac": 0.07777777928858995,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15779079794883727,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15779079794883727,
"signal/advantage_abs_mean": 0.3536957919597626,
"signal/advantage_pre_scale_abs_mean": 0.3536957919597626,
"signal/advantage_pre_scale_std": 0.4313755929470062,
"signal/advantage_std": 0.4313755929470062,
"signal/brier_reward/centered_abs_mean": 0.31298828125,
"signal/brier_reward/group_bin_occupancy": 0.545138888888889,
"signal/brier_reward/group_std_mean": 0.36775757670402526,
"signal/brier_reward/group_zero_std_frac": 0.00555555559694767,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03129882961511612,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03129882961511612,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22144390940666198,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6322916666666667,
"signal/confidence_uniqueness_reward/group_std_mean": 0.27823981642723083,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022144390642642973,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022144390642642973,
"signal/format_reward/centered_abs_mean": 0.3566026449203491,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.42138834595680236,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17830132246017455,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.17830132246017455,
"signal/frontier_aurc_reward/centered_abs_mean": 0.3110755383968353,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_aurc_reward/group_std_mean": 0.3699175715446472,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_0/centered_abs_mean": 0.3110755383968353,
"signal/frontier_coverage_0/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_coverage_0/group_std_mean": 0.3699175715446472,
"signal/frontier_coverage_0/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_1/centered_abs_mean": 0.3110755383968353,
"signal/frontier_coverage_1/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_coverage_1/group_std_mean": 0.3699175715446472,
"signal/frontier_coverage_1/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_10/centered_abs_mean": 0.3110755383968353,
"signal/frontier_coverage_10/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_coverage_10/group_std_mean": 0.3699175715446472,
"signal/frontier_coverage_10/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_15/centered_abs_mean": 0.3110755383968353,
"signal/frontier_coverage_15/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_coverage_15/group_std_mean": 0.3699175715446472,
"signal/frontier_coverage_15/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_20/centered_abs_mean": 0.3110755383968353,
"signal/frontier_coverage_20/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_coverage_20/group_std_mean": 0.3699175715446472,
"signal/frontier_coverage_20/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_25/centered_abs_mean": 0.3110755383968353,
"signal/frontier_coverage_25/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_coverage_25/group_std_mean": 0.3699175715446472,
"signal/frontier_coverage_25/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_5/centered_abs_mean": 0.3110755383968353,
"signal/frontier_coverage_5/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_coverage_5/group_std_mean": 0.3699175715446472,
"signal/frontier_coverage_5/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003888444369658828,
"signal/frontier_ece_reward/centered_abs_mean": 0.3110755383968353,
"signal/frontier_ece_reward/group_bin_occupancy": 0.4229166666666667,
"signal/frontier_ece_reward/group_std_mean": 0.3699175715446472,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031107554957270623,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031107554957270623,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4007949113845825,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.34236111111111117,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4584290623664856,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04007949084043503,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04007949084043503,
"step": 10
},
{
"calibration/aurc": 0.5634525142239435,
"calibration/batch_distribution_entropy": 0.2936840156784979,
"calibration/batch_entropy_100bins": 0.3574441819799151,
"calibration/batch_entropy_10bins": 0.2936840156784979,
"calibration/batch_entropy_50bins": 0.41566705111670643,
"calibration/batch_uniqueness": 0.5181815236994417,
"calibration/confidence_entropy": 0.22663220422136415,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5354899110079999,
"calibration/mean_confidence": 0.9149286672818105,
"calibration/prompt_uniqueness": 0.3991411197867409,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 4018.0,
"completions/max_terminated_length": 4018.0,
"completions/mean_length": 430.7875915527344,
"completions/mean_terminated_length": 435.9453552246094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 44.0,
"epoch": 0.03599955000562493,
"grad_norm": 0.0013605451676994562,
"learning_rate": 1.7857142857142859e-06,
"loss": -0.0061,
"num_tokens": 25749178.0,
"reward": 0.6798976540565491,
"reward_std": 0.34733850955963136,
"rewards/accuracy_reward": 0.3006076455116272,
"rewards/brier_reward": 0.4006872236728668,
"rewards/confidence_uniqueness_reward": 0.49280205368995667,
"rewards/format_reward": 0.9177083253860474,
"rewards/frontier_aurc_reward": 0.32862133979797364,
"rewards/frontier_coverage_0": 0.32862133979797364,
"rewards/frontier_coverage_1": 0.32862133979797364,
"rewards/frontier_coverage_10": 0.32862133979797364,
"rewards/frontier_coverage_15": 0.32862133979797364,
"rewards/frontier_coverage_20": 0.32862133979797364,
"rewards/frontier_coverage_25": 0.32862133979797364,
"rewards/frontier_coverage_5": 0.32862133979797364,
"rewards/frontier_ece_reward": 0.32862133979797364,
"rewards/frontier_entropy_batch_reward": -0.8433352708816528,
"signal/accuracy_reward/centered_abs_mean": 0.31458876729011537,
"signal/accuracy_reward/group_bin_occupancy": 0.23888888888888887,
"signal/accuracy_reward/group_std_mean": 0.3746976673603058,
"signal/accuracy_reward/group_zero_std_frac": 0.0888888917863369,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15729438364505768,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15729438364505768,
"signal/advantage_abs_mean": 0.2811680108308792,
"signal/advantage_pre_scale_abs_mean": 0.2811680108308792,
"signal/advantage_pre_scale_std": 0.3555997729301453,
"signal/advantage_std": 0.3555997729301453,
"signal/brier_reward/centered_abs_mean": 0.3005147516727448,
"signal/brier_reward/group_bin_occupancy": 0.6149305555555555,
"signal/brier_reward/group_std_mean": 0.35394822955131533,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030051474645733833,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.030051474645733833,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18455613553524017,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6274305555555556,
"signal/confidence_uniqueness_reward/group_std_mean": 0.23438866436481476,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018455613404512405,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018455613404512405,
"signal/format_reward/centered_abs_mean": 0.13573133796453477,
"signal/format_reward/group_bin_occupancy": 0.22152777777777782,
"signal/format_reward/group_std_mean": 0.220550999045372,
"signal/format_reward/group_zero_std_frac": 0.22777777388691903,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06786566898226738,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.06786566898226738,
"signal/frontier_aurc_reward/centered_abs_mean": 0.30784491300582884,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_aurc_reward/group_std_mean": 0.36494665741920473,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_0/centered_abs_mean": 0.30784491300582884,
"signal/frontier_coverage_0/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_coverage_0/group_std_mean": 0.36494665741920473,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_1/centered_abs_mean": 0.30784491300582884,
"signal/frontier_coverage_1/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_coverage_1/group_std_mean": 0.36494665741920473,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_10/centered_abs_mean": 0.30784491300582884,
"signal/frontier_coverage_10/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_coverage_10/group_std_mean": 0.36494665741920473,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_15/centered_abs_mean": 0.30784491300582884,
"signal/frontier_coverage_15/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_coverage_15/group_std_mean": 0.36494665741920473,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_20/centered_abs_mean": 0.30784491300582884,
"signal/frontier_coverage_20/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_coverage_20/group_std_mean": 0.36494665741920473,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_25/centered_abs_mean": 0.30784491300582884,
"signal/frontier_coverage_25/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_coverage_25/group_std_mean": 0.36494665741920473,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_5/centered_abs_mean": 0.30784491300582884,
"signal/frontier_coverage_5/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_coverage_5/group_std_mean": 0.36494665741920473,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038480616174638273,
"signal/frontier_ece_reward/centered_abs_mean": 0.30784491300582884,
"signal/frontier_ece_reward/group_bin_occupancy": 0.48611111111111105,
"signal/frontier_ece_reward/group_std_mean": 0.36494665741920473,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03078449293971062,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03078449293971062,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24438310861587526,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.36006944444444444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3511778712272644,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.02500000037252903,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02443831190466881,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02443831190466881,
"step": 15
},
{
"calibration/aurc": 0.49393609341404227,
"calibration/batch_distribution_entropy": 0.3950132648234722,
"calibration/batch_entropy_100bins": 0.40505684224566296,
"calibration/batch_entropy_10bins": 0.3950132648234722,
"calibration/batch_entropy_50bins": 0.4693520387698801,
"calibration/batch_uniqueness": 0.61509530787946,
"calibration/buffer_distribution_entropy": 0.3091847349375323,
"calibration/buffer_entropy_100bins": 0.37384274971807285,
"calibration/buffer_entropy_10bins": 0.3091847349375323,
"calibration/buffer_entropy_50bins": 0.4345021124322783,
"calibration/confidence_entropy": 0.2933222613376684,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.029023746701846966,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4258668636956767,
"calibration/mean_confidence": 0.8878107373812835,
"calibration/prompt_uniqueness": 0.517297995778335,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010850694444444442,
"completions/max_length": 3979.6,
"completions/max_terminated_length": 3979.6,
"completions/mean_length": 432.4179748535156,
"completions/mean_terminated_length": 437.20750732421874,
"completions/min_length": 0.0,
"completions/min_terminated_length": 68.8,
"epoch": 0.04799940000749991,
"grad_norm": 0.0028677526861429214,
"learning_rate": 2.380952380952381e-06,
"loss": -0.0086,
"num_tokens": 33844329.0,
"reward": 0.7569576263427734,
"reward_std": 0.2728476107120514,
"rewards/accuracy_reward": 0.41449652910232543,
"rewards/brier_reward": 0.5286458790302276,
"rewards/confidence_uniqueness_reward": 0.606674587726593,
"rewards/format_reward": 0.9817708253860473,
"rewards/frontier_aurc_reward": 0.17920130817219615,
"rewards/frontier_coverage_0": 0.1886154913343489,
"rewards/frontier_coverage_1": 0.1886154913343489,
"rewards/frontier_coverage_10": 0.1886154913343489,
"rewards/frontier_coverage_15": 0.1886154913343489,
"rewards/frontier_coverage_20": 0.1886154913343489,
"rewards/frontier_coverage_25": 0.1886154913343489,
"rewards/frontier_coverage_5": 0.1886154913343489,
"rewards/frontier_ece_reward": 0.1644112183363177,
"rewards/frontier_entropy_batch_reward": -0.8989308953285218,
"signal/accuracy_reward/centered_abs_mean": 0.2998209595680237,
"signal/accuracy_reward/group_bin_occupancy": 0.24131944444444448,
"signal/accuracy_reward/group_std_mean": 0.3666124284267426,
"signal/accuracy_reward/group_zero_std_frac": 0.06944444701075554,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14991047978401184,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14991047978401184,
"signal/advantage_abs_mean": 0.2194227993488312,
"signal/advantage_pre_scale_abs_mean": 0.2194227993488312,
"signal/advantage_pre_scale_std": 0.28129519820213317,
"signal/advantage_std": 0.28129519820213317,
"signal/brier_reward/centered_abs_mean": 0.26685882806777955,
"signal/brier_reward/group_bin_occupancy": 0.6607638888888889,
"signal/brier_reward/group_std_mean": 0.32316548824310304,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02668588310480118,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02668588310480118,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15834780037403107,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6138888888888888,
"signal/confidence_uniqueness_reward/group_std_mean": 0.19463339745998381,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015834780223667622,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015834780223667622,
"signal/format_reward/centered_abs_mean": 0.03338758684694767,
"signal/format_reward/group_bin_occupancy": 0.16562499999999997,
"signal/format_reward/group_std_mean": 0.07192002534866333,
"signal/format_reward/group_zero_std_frac": 0.675000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016693793423473834,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.016693793423473834,
"signal/frontier_aurc_reward/centered_abs_mean": 0.12922168229706585,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6350694444444445,
"signal/frontier_aurc_reward/group_std_mean": 0.1566169561818242,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.001615271106857108,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.001615271106857108,
"signal/frontier_coverage_0/centered_abs_mean": 0.1444099996238947,
"signal/frontier_coverage_0/group_bin_occupancy": 0.6125,
"signal/frontier_coverage_0/group_std_mean": 0.18344281539320945,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_1/centered_abs_mean": 0.1444099996238947,
"signal/frontier_coverage_1/group_bin_occupancy": 0.6125,
"signal/frontier_coverage_1/group_std_mean": 0.18344281539320945,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_10/centered_abs_mean": 0.1444099996238947,
"signal/frontier_coverage_10/group_bin_occupancy": 0.6125,
"signal/frontier_coverage_10/group_std_mean": 0.18344281539320945,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_15/centered_abs_mean": 0.1444099996238947,
"signal/frontier_coverage_15/group_bin_occupancy": 0.6125,
"signal/frontier_coverage_15/group_std_mean": 0.18344281539320945,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_20/centered_abs_mean": 0.1444099996238947,
"signal/frontier_coverage_20/group_bin_occupancy": 0.6125,
"signal/frontier_coverage_20/group_std_mean": 0.18344281539320945,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_25/centered_abs_mean": 0.1444099996238947,
"signal/frontier_coverage_25/group_bin_occupancy": 0.6125,
"signal/frontier_coverage_25/group_std_mean": 0.18344281539320945,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_5/centered_abs_mean": 0.1444099996238947,
"signal/frontier_coverage_5/group_bin_occupancy": 0.6125,
"signal/frontier_coverage_5/group_std_mean": 0.18344281539320945,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018051250837743283,
"signal/frontier_ece_reward/centered_abs_mean": 0.22200666069984437,
"signal/frontier_ece_reward/group_bin_occupancy": 0.4690972222222222,
"signal/frontier_ece_reward/group_std_mean": 0.26992476880550387,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02220066711306572,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02220066711306572,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16805205643177032,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3486111111111111,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2819783270359039,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09166666865348816,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01680520586669445,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01680520586669445,
"step": 20
},
{
"calibration/aurc": 0.40831880456732605,
"calibration/batch_distribution_entropy": 0.6097885070554001,
"calibration/batch_entropy_100bins": 0.48593398604284915,
"calibration/batch_entropy_10bins": 0.6097885070554001,
"calibration/batch_entropy_50bins": 0.5663091534135619,
"calibration/batch_uniqueness": 0.7425439382867337,
"calibration/buffer_distribution_entropy": 0.36603622224810134,
"calibration/buffer_entropy_100bins": 0.39999389908282235,
"calibration/buffer_entropy_10bins": 0.36603622224810134,
"calibration/buffer_entropy_50bins": 0.4646358566837005,
"calibration/confidence_entropy": 0.39686215013611503,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.07571801566579635,
"calibration/coverage@30%": 0.2670907759680606,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.2710533311020483,
"calibration/mean_confidence": 0.8242252010281881,
"calibration/prompt_uniqueness": 0.6616427423448029,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00980902777777779,
"completions/max_length": 3722.6,
"completions/max_terminated_length": 3722.6,
"completions/mean_length": 459.39210205078126,
"completions/mean_terminated_length": 463.9351379394531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.2,
"epoch": 0.05999925000937488,
"grad_norm": 0.0009269694564864039,
"learning_rate": 2.9761904761904763e-06,
"loss": -0.0064,
"num_tokens": 42260974.0,
"reward": 0.7958746194839478,
"reward_std": 0.2274015724658966,
"rewards/accuracy_reward": 0.5085069417953492,
"rewards/brier_reward": 0.6438981890678406,
"rewards/confidence_uniqueness_reward": 0.734617817401886,
"rewards/format_reward": 0.9876736283302308,
"rewards/frontier_aurc_reward": -0.004628232168033719,
"rewards/frontier_coverage_0": 0.005646060802973807,
"rewards/frontier_coverage_1": 0.005646060802973807,
"rewards/frontier_coverage_10": 0.005646060802973807,
"rewards/frontier_coverage_15": 0.005646060802973807,
"rewards/frontier_coverage_20": 0.005646060802973807,
"rewards/frontier_coverage_25": 0.005646060802973807,
"rewards/frontier_coverage_5": 0.005646060802973807,
"rewards/frontier_ece_reward": 0.007948444318026304,
"rewards/frontier_entropy_batch_reward": -0.9129829168319702,
"signal/accuracy_reward/centered_abs_mean": 0.28665364980697633,
"signal/accuracy_reward/group_bin_occupancy": 0.23819444444444446,
"signal/accuracy_reward/group_std_mean": 0.3534803450107574,
"signal/accuracy_reward/group_zero_std_frac": 0.09444444552063942,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14332682490348816,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14332682490348816,
"signal/advantage_abs_mean": 0.18016738891601564,
"signal/advantage_pre_scale_abs_mean": 0.18016738891601564,
"signal/advantage_pre_scale_std": 0.238165420293808,
"signal/advantage_std": 0.238165420293808,
"signal/brier_reward/centered_abs_mean": 0.21931754648685456,
"signal/brier_reward/group_bin_occupancy": 0.7795138888888888,
"signal/brier_reward/group_std_mean": 0.27221688628196716,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021931754797697066,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.021931754797697066,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09854339063167572,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6871527777777777,
"signal/confidence_uniqueness_reward/group_std_mean": 0.12727494537830353,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009854339342564345,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009854339342564345,
"signal/format_reward/centered_abs_mean": 0.022319878824055193,
"signal/format_reward/group_bin_occupancy": 0.15138888888888888,
"signal/format_reward/group_std_mean": 0.047178071737289426,
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011159939412027597,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011159939412027597,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002872100844979286,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7059027777777778,
"signal/frontier_aurc_reward/group_std_mean": 0.004377482458949089,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5901259980164466e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5901259980164466e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.0523149847984314,
"signal/frontier_coverage_0/group_bin_occupancy": 0.7753472222222222,
"signal/frontier_coverage_0/group_std_mean": 0.08000584244728089,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_1/centered_abs_mean": 0.0523149847984314,
"signal/frontier_coverage_1/group_bin_occupancy": 0.7753472222222222,
"signal/frontier_coverage_1/group_std_mean": 0.08000584244728089,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_10/centered_abs_mean": 0.0523149847984314,
"signal/frontier_coverage_10/group_bin_occupancy": 0.7753472222222222,
"signal/frontier_coverage_10/group_std_mean": 0.08000584244728089,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_15/centered_abs_mean": 0.0523149847984314,
"signal/frontier_coverage_15/group_bin_occupancy": 0.7753472222222222,
"signal/frontier_coverage_15/group_std_mean": 0.08000584244728089,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_20/centered_abs_mean": 0.0523149847984314,
"signal/frontier_coverage_20/group_bin_occupancy": 0.7753472222222222,
"signal/frontier_coverage_20/group_std_mean": 0.08000584244728089,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_25/centered_abs_mean": 0.0523149847984314,
"signal/frontier_coverage_25/group_bin_occupancy": 0.7753472222222222,
"signal/frontier_coverage_25/group_std_mean": 0.08000584244728089,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_5/centered_abs_mean": 0.0523149847984314,
"signal/frontier_coverage_5/group_bin_occupancy": 0.7753472222222222,
"signal/frontier_coverage_5/group_std_mean": 0.08000584244728089,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00065393730183132,
"signal/frontier_ece_reward/centered_abs_mean": 0.13553643673658372,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6496527777777777,
"signal/frontier_ece_reward/group_std_mean": 0.16666682958602905,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013553644344210625,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013553644344210625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1483454465866089,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30590277777777775,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.26824913918972015,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1527777798473835,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0148345448076725,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0148345448076725,
"step": 25
},
{
"calibration/aurc": 0.31340018732890096,
"calibration/batch_distribution_entropy": 0.7048907327472749,
"calibration/batch_entropy_100bins": 0.5451866323540866,
"calibration/batch_entropy_10bins": 0.7048907327472749,
"calibration/batch_entropy_50bins": 0.6235172362071362,
"calibration/batch_uniqueness": 0.7777645419569349,
"calibration/buffer_distribution_entropy": 0.4720834768392196,
"calibration/buffer_entropy_100bins": 0.4512809972000536,
"calibration/buffer_entropy_10bins": 0.4720834768392196,
"calibration/buffer_entropy_50bins": 0.5236436414920623,
"calibration/confidence_entropy": 0.5383263280850088,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.060188392145496705,
"calibration/coverage@25%": 0.17894412155077963,
"calibration/coverage@30%": 0.3775832099984736,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.12798357790902398,
"calibration/mean_confidence": 0.7155363782263905,
"calibration/prompt_uniqueness": 0.7025420849805335,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01553819444444442,
"completions/max_length": 3862.6,
"completions/max_terminated_length": 3862.6,
"completions/mean_length": 536.9459350585937,
"completions/mean_terminated_length": 545.4788208007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.2,
"epoch": 0.07199910001124986,
"grad_norm": 0.0006166549865156412,
"learning_rate": 3.5714285714285718e-06,
"loss": -0.0102,
"num_tokens": 51556511.0,
"reward": 0.8471167922019959,
"reward_std": 0.19835625290870668,
"rewards/accuracy_reward": 0.5782118082046509,
"rewards/brier_reward": 0.7210497856140137,
"rewards/confidence_uniqueness_reward": 0.7651053071022034,
"rewards/format_reward": 0.9834201216697693,
"rewards/frontier_aurc_reward": -0.003613748401403427,
"rewards/frontier_coverage_0": -0.00830224696546793,
"rewards/frontier_coverage_1": -0.00830224696546793,
"rewards/frontier_coverage_10": -0.00830224696546793,
"rewards/frontier_coverage_15": -0.00830224696546793,
"rewards/frontier_coverage_20": -0.00830224696546793,
"rewards/frontier_coverage_25": -0.00830224696546793,
"rewards/frontier_coverage_5": -0.00830224696546793,
"rewards/frontier_ece_reward": 0.023493098840117455,
"rewards/frontier_entropy_batch_reward": -0.838923704624176,
"signal/accuracy_reward/centered_abs_mean": 0.24400499165058137,
"signal/accuracy_reward/group_bin_occupancy": 0.22743055555555552,
"signal/accuracy_reward/group_std_mean": 0.30676281452178955,
"signal/accuracy_reward/group_zero_std_frac": 0.18055555820465088,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12200249582529069,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12200249582529069,
"signal/advantage_abs_mean": 0.15304453670978546,
"signal/advantage_pre_scale_abs_mean": 0.15304453670978546,
"signal/advantage_pre_scale_std": 0.2149382084608078,
"signal/advantage_std": 0.2149382084608078,
"signal/brier_reward/centered_abs_mean": 0.15970109701156615,
"signal/brier_reward/group_bin_occupancy": 0.8368055555555556,
"signal/brier_reward/group_std_mean": 0.20384239852428437,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015970110520720483,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015970110520720483,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11436907052993775,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.689236111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.14522747993469237,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011436907574534416,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011436907574534416,
"signal/format_reward/centered_abs_mean": 0.02875976599752903,
"signal/format_reward/group_bin_occupancy": 0.15590277777777778,
"signal/format_reward/group_std_mean": 0.057394811511039735,
"signal/format_reward/group_zero_std_frac": 0.7527777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014379882998764516,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014379882998764516,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016801425954326987,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7125,
"signal/frontier_aurc_reward/group_std_mean": 0.002739586587995291,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1001783170504496e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1001783170504496e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.0830587849020958,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8541666666666666,
"signal/frontier_coverage_0/group_std_mean": 0.11073667109012604,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_1/centered_abs_mean": 0.0830587849020958,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8541666666666666,
"signal/frontier_coverage_1/group_std_mean": 0.11073667109012604,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_10/centered_abs_mean": 0.0830587849020958,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8541666666666666,
"signal/frontier_coverage_10/group_std_mean": 0.11073667109012604,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_15/centered_abs_mean": 0.0830587849020958,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8541666666666666,
"signal/frontier_coverage_15/group_std_mean": 0.11073667109012604,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_20/centered_abs_mean": 0.0830587849020958,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8541666666666666,
"signal/frontier_coverage_20/group_std_mean": 0.11073667109012604,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_25/centered_abs_mean": 0.0830587849020958,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8541666666666666,
"signal/frontier_coverage_25/group_std_mean": 0.11073667109012604,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_5/centered_abs_mean": 0.0830587849020958,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8541666666666666,
"signal/frontier_coverage_5/group_std_mean": 0.11073667109012604,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001038234820589423,
"signal/frontier_ece_reward/centered_abs_mean": 0.08026944175362587,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7819444444444444,
"signal/frontier_ece_reward/group_std_mean": 0.10268646031618119,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008026944752782584,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008026944752782584,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2555039495229721,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38815844655036924,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.03888888955116272,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025550395622849463,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025550395622849463,
"step": 30
},
{
"calibration/aurc": 0.25956437104203445,
"calibration/batch_distribution_entropy": 0.8165243972425698,
"calibration/batch_entropy_100bins": 0.8075646239662813,
"calibration/batch_entropy_10bins": 0.8165243972425698,
"calibration/batch_entropy_50bins": 0.8399069478240373,
"calibration/batch_uniqueness": 0.9168564222845603,
"calibration/buffer_distribution_entropy": 0.5710273729435295,
"calibration/buffer_entropy_100bins": 0.5281526593776766,
"calibration/buffer_entropy_10bins": 0.5710273729435295,
"calibration/buffer_entropy_50bins": 0.601999214873284,
"calibration/confidence_entropy": 0.5454837996401404,
"calibration/coverage@0%": 0.0020887728459530026,
"calibration/coverage@1%": 0.0020887728459530026,
"calibration/coverage@10%": 0.06197192161099606,
"calibration/coverage@15%": 0.11850014659270083,
"calibration/coverage@20%": 0.3127652813079301,
"calibration/coverage@25%": 0.5564344484546024,
"calibration/coverage@30%": 0.6621659907782901,
"calibration/coverage@5%": 0.02454308093994778,
"calibration/ece": 0.11688668618444734,
"calibration/mean_confidence": 0.670672226113574,
"calibration/prompt_uniqueness": 0.8470008776094577,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020486111111111115,
"completions/max_length": 3877.4,
"completions/max_terminated_length": 3877.4,
"completions/mean_length": 578.4817749023438,
"completions/mean_terminated_length": 590.725341796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 146.4,
"epoch": 0.08399895001312484,
"grad_norm": 0.0005679262103512883,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0128,
"num_tokens": 61298061.0,
"reward": 0.9099455237388611,
"reward_std": 0.1839153379201889,
"rewards/accuracy_reward": 0.61875,
"rewards/brier_reward": 0.7439757466316224,
"rewards/confidence_uniqueness_reward": 0.8963147759437561,
"rewards/format_reward": 0.9782118082046509,
"rewards/frontier_aurc_reward": -0.0029043381568044425,
"rewards/frontier_coverage_0": -0.01887217308394611,
"rewards/frontier_coverage_1": -0.01887217308394611,
"rewards/frontier_coverage_10": -0.01887217308394611,
"rewards/frontier_coverage_15": -0.01887217308394611,
"rewards/frontier_coverage_20": -0.01887217308394611,
"rewards/frontier_coverage_25": -0.01887217308394611,
"rewards/frontier_coverage_5": -0.01887217308394611,
"rewards/frontier_ece_reward": 0.02471662126481533,
"rewards/frontier_entropy_batch_reward": -0.5334846794605255,
"signal/accuracy_reward/centered_abs_mean": 0.21307508647441864,
"signal/accuracy_reward/group_bin_occupancy": 0.21701388888888892,
"signal/accuracy_reward/group_std_mean": 0.27209571599960325,
"signal/accuracy_reward/group_zero_std_frac": 0.2638888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10653754323720932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10653754323720932,
"signal/advantage_abs_mean": 0.14091622233390808,
"signal/advantage_pre_scale_abs_mean": 0.14091622233390808,
"signal/advantage_pre_scale_std": 0.20204322636127472,
"signal/advantage_std": 0.20204322636127472,
"signal/brier_reward/centered_abs_mean": 0.15218718945980073,
"signal/brier_reward/group_bin_occupancy": 0.8690972222222222,
"signal/brier_reward/group_std_mean": 0.19625934958457947,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015218720026314258,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015218720026314258,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06924531385302543,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7503472222222223,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09717238694429398,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0069245313294231895,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0069245313294231895,
"signal/format_reward/centered_abs_mean": 0.03473849855363369,
"signal/format_reward/group_bin_occupancy": 0.15416666666666665,
"signal/format_reward/group_std_mean": 0.060436099767684937,
"signal/format_reward/group_zero_std_frac": 0.7666666626930236,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017369249276816844,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.017369249276816844,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016358074499294162,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7302083333333333,
"signal/frontier_aurc_reward/group_std_mean": 0.002670387364923954,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.044759276031982e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.044759276031982e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.12218185663223266,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8847222222222223,
"signal/frontier_coverage_0/group_std_mean": 0.16369088590145112,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_1/centered_abs_mean": 0.12218185663223266,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8847222222222223,
"signal/frontier_coverage_1/group_std_mean": 0.16369088590145112,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_10/centered_abs_mean": 0.12218185663223266,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8847222222222223,
"signal/frontier_coverage_10/group_std_mean": 0.16369088590145112,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_15/centered_abs_mean": 0.12218185663223266,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8847222222222223,
"signal/frontier_coverage_15/group_std_mean": 0.16369088590145112,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_20/centered_abs_mean": 0.12218185663223266,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8847222222222223,
"signal/frontier_coverage_20/group_std_mean": 0.16369088590145112,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_25/centered_abs_mean": 0.12218185663223266,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8847222222222223,
"signal/frontier_coverage_25/group_std_mean": 0.16369088590145112,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_5/centered_abs_mean": 0.12218185663223266,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8847222222222223,
"signal/frontier_coverage_5/group_std_mean": 0.16369088590145112,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001527273189276457,
"signal/frontier_ece_reward/centered_abs_mean": 0.06000325083732605,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6788194444444444,
"signal/frontier_ece_reward/group_std_mean": 0.08099779933691025,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006000325083732605,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006000325083732605,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4390486657619476,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7121527777777776,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.500614058971405,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04390486851334572,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04390486851334572,
"step": 35
},
{
"calibration/aurc": 0.30461219130156075,
"calibration/batch_distribution_entropy": 0.97860829400174,
"calibration/batch_entropy_100bins": 0.9486484674822325,
"calibration/batch_entropy_10bins": 0.97860829400174,
"calibration/batch_entropy_50bins": 0.9682282190043502,
"calibration/batch_uniqueness": 0.9518603939567309,
"calibration/buffer_distribution_entropy": 0.6674029515556968,
"calibration/buffer_entropy_100bins": 0.6329905407674977,
"calibration/buffer_entropy_10bins": 0.6674029515556968,
"calibration/buffer_entropy_50bins": 0.696431956314964,
"calibration/confidence_entropy": 0.5217791211913715,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.06248734222348997,
"calibration/coverage@20%": 0.11387957878723051,
"calibration/coverage@25%": 0.34223374312873,
"calibration/coverage@30%": 0.5964517660517661,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.21549805876997757,
"calibration/mean_confidence": 0.5352754370110747,
"calibration/prompt_uniqueness": 0.8878912054727662,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020572916666666653,
"completions/max_length": 3790.6,
"completions/max_terminated_length": 3790.6,
"completions/mean_length": 589.0677978515625,
"completions/mean_terminated_length": 601.5003784179687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.4,
"epoch": 0.09599880001499982,
"grad_norm": 0.0004772288375534117,
"learning_rate": 4.761904761904762e-06,
"loss": -0.0183,
"num_tokens": 71203642.0,
"reward": 0.9513100028038025,
"reward_std": 0.17211353182792663,
"rewards/accuracy_reward": 0.6349826335906983,
"rewards/brier_reward": 0.7103789806365967,
"rewards/confidence_uniqueness_reward": 0.9307293176651001,
"rewards/format_reward": 0.97734375,
"rewards/frontier_aurc_reward": -0.002656676573678851,
"rewards/frontier_coverage_0": -0.04805164374411106,
"rewards/frontier_coverage_1": -0.04805164374411106,
"rewards/frontier_coverage_10": -0.04805164374411106,
"rewards/frontier_coverage_15": -0.04805164374411106,
"rewards/frontier_coverage_20": -0.04805164374411106,
"rewards/frontier_coverage_25": -0.04805164374411106,
"rewards/frontier_coverage_5": -0.04805164374411106,
"rewards/frontier_ece_reward": 0.015457052178680897,
"rewards/frontier_entropy_batch_reward": -0.16271998584270478,
"signal/accuracy_reward/centered_abs_mean": 0.19670681655406952,
"signal/accuracy_reward/group_bin_occupancy": 0.21631944444444445,
"signal/accuracy_reward/group_std_mean": 0.2593521386384964,
"signal/accuracy_reward/group_zero_std_frac": 0.2694444447755814,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09835340827703476,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09835340827703476,
"signal/advantage_abs_mean": 0.12720865309238433,
"signal/advantage_pre_scale_abs_mean": 0.12720865309238433,
"signal/advantage_pre_scale_std": 0.19325639307498932,
"signal/advantage_std": 0.19325639307498932,
"signal/brier_reward/centered_abs_mean": 0.21643259525299072,
"signal/brier_reward/group_bin_occupancy": 0.9309027777777779,
"signal/brier_reward/group_std_mean": 0.2640606015920639,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02164325937628746,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02164325937628746,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047277380526065824,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7392361111111112,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07938017547130585,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004727738164365292,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004727738164365292,
"signal/format_reward/centered_abs_mean": 0.03805881068110466,
"signal/format_reward/group_bin_occupancy": 0.15902777777777777,
"signal/format_reward/group_std_mean": 0.06856417283415794,
"signal/format_reward/group_zero_std_frac": 0.7277777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01902940534055233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01902940534055233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017690456472337246,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6746527777777778,
"signal/frontier_aurc_reward/group_std_mean": 0.00304986541159451,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.211307037214283e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.211307037214283e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.23642539083957673,
"signal/frontier_coverage_0/group_bin_occupancy": 0.9170138888888889,
"signal/frontier_coverage_0/group_std_mean": 0.3045207381248474,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_1/centered_abs_mean": 0.23642539083957673,
"signal/frontier_coverage_1/group_bin_occupancy": 0.9170138888888889,
"signal/frontier_coverage_1/group_std_mean": 0.3045207381248474,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_10/centered_abs_mean": 0.23642539083957673,
"signal/frontier_coverage_10/group_bin_occupancy": 0.9170138888888889,
"signal/frontier_coverage_10/group_std_mean": 0.3045207381248474,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_15/centered_abs_mean": 0.23642539083957673,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9170138888888889,
"signal/frontier_coverage_15/group_std_mean": 0.3045207381248474,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_20/centered_abs_mean": 0.23642539083957673,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9170138888888889,
"signal/frontier_coverage_20/group_std_mean": 0.3045207381248474,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_25/centered_abs_mean": 0.23642539083957673,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9170138888888889,
"signal/frontier_coverage_25/group_std_mean": 0.3045207381248474,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_5/centered_abs_mean": 0.23642539083957673,
"signal/frontier_coverage_5/group_bin_occupancy": 0.9170138888888889,
"signal/frontier_coverage_5/group_std_mean": 0.3045207381248474,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002955317497253418,
"signal/frontier_ece_reward/centered_abs_mean": 0.06959517598152161,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7576388888888889,
"signal/frontier_ece_reward/group_std_mean": 0.09464571475982667,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006959517952054739,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006959517952054739,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2507960021495819,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8145833333333332,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32827151417732237,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025079600140452386,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025079600140452386,
"step": 40
},
{
"calibration/aurc": 0.2344727023359694,
"calibration/batch_distribution_entropy": 0.9518525106541613,
"calibration/batch_entropy_100bins": 0.9412130004420993,
"calibration/batch_entropy_10bins": 0.9518525106541613,
"calibration/batch_entropy_50bins": 0.9527945423184636,
"calibration/batch_uniqueness": 0.9461262919998014,
"calibration/buffer_distribution_entropy": 0.7459719739155918,
"calibration/buffer_entropy_100bins": 0.7118780035095448,
"calibration/buffer_entropy_10bins": 0.7459719739155918,
"calibration/buffer_entropy_50bins": 0.7661675140410678,
"calibration/confidence_entropy": 0.4776737674852377,
"calibration/coverage@0%": 0.012736259244119067,
"calibration/coverage@1%": 0.012736259244119067,
"calibration/coverage@10%": 0.02381805343936973,
"calibration/coverage@15%": 0.13136398524975118,
"calibration/coverage@20%": 0.2600392937120426,
"calibration/coverage@25%": 0.6212789086899664,
"calibration/coverage@30%": 0.9968337730870713,
"calibration/coverage@5%": 0.012736259244119067,
"calibration/ece": 0.19628602513678026,
"calibration/mean_confidence": 0.6154520747384761,
"calibration/prompt_uniqueness": 0.8831628620958897,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.023611111111111138,
"completions/max_length": 3450.0,
"completions/max_terminated_length": 3450.0,
"completions/mean_length": 588.0378540039062,
"completions/mean_terminated_length": 602.3594970703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.4,
"epoch": 0.1079986500168748,
"grad_norm": 0.00051538908155635,
"learning_rate": 4.909638554216868e-06,
"loss": -0.0195,
"num_tokens": 81113102.0,
"reward": 0.9507439851760864,
"reward_std": 0.1727825313806534,
"rewards/accuracy_reward": 0.6421006917953491,
"rewards/brier_reward": 0.7137632369995117,
"rewards/confidence_uniqueness_reward": 0.9256102681159973,
"rewards/format_reward": 0.97578125,
"rewards/frontier_aurc_reward": -0.002600804064422846,
"rewards/frontier_coverage_0": -0.033070035930722955,
"rewards/frontier_coverage_1": -0.033070035930722955,
"rewards/frontier_coverage_10": -0.033070035930722955,
"rewards/frontier_coverage_15": -0.033070035930722955,
"rewards/frontier_coverage_20": -0.033070035930722955,
"rewards/frontier_coverage_25": -0.033070035930722955,
"rewards/frontier_coverage_5": -0.033070035930722955,
"rewards/frontier_ece_reward": 0.023521846160292625,
"rewards/frontier_entropy_batch_reward": -0.2156035676598549,
"signal/accuracy_reward/centered_abs_mean": 0.19353841245174408,
"signal/accuracy_reward/group_bin_occupancy": 0.21631944444444443,
"signal/accuracy_reward/group_std_mean": 0.2572809010744095,
"signal/accuracy_reward/group_zero_std_frac": 0.2694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09676920622587204,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09676920622587204,
"signal/advantage_abs_mean": 0.12924040853977203,
"signal/advantage_pre_scale_abs_mean": 0.12924040853977203,
"signal/advantage_pre_scale_std": 0.1953139752149582,
"signal/advantage_std": 0.1953139752149582,
"signal/brier_reward/centered_abs_mean": 0.23464938700199128,
"signal/brier_reward/group_bin_occupancy": 0.8972222222222221,
"signal/brier_reward/group_std_mean": 0.2843640446662903,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02346493937075138,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02346493937075138,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05085535049438476,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7968749999999999,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07848654761910438,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005085535254329443,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005085535254329443,
"signal/format_reward/centered_abs_mean": 0.03865559995174408,
"signal/format_reward/group_bin_occupancy": 0.1545138888888889,
"signal/format_reward/group_std_mean": 0.06427749693393707,
"signal/format_reward/group_zero_std_frac": 0.7638888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01932779997587204,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01932779997587204,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002126035187393427,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7104166666666668,
"signal/frontier_aurc_reward/group_std_mean": 0.0032929918263107536,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6575440278975294e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6575440278975294e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.23452837765216827,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8694444444444445,
"signal/frontier_coverage_0/group_std_mean": 0.31311619877815244,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_1/centered_abs_mean": 0.23452837765216827,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8694444444444445,
"signal/frontier_coverage_1/group_std_mean": 0.31311619877815244,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_10/centered_abs_mean": 0.23452837765216827,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8694444444444445,
"signal/frontier_coverage_10/group_std_mean": 0.31311619877815244,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_15/centered_abs_mean": 0.23452837765216827,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8694444444444445,
"signal/frontier_coverage_15/group_std_mean": 0.31311619877815244,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_20/centered_abs_mean": 0.23452837765216827,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8694444444444445,
"signal/frontier_coverage_20/group_std_mean": 0.31311619877815244,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_25/centered_abs_mean": 0.23452837765216827,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8694444444444445,
"signal/frontier_coverage_25/group_std_mean": 0.31311619877815244,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_5/centered_abs_mean": 0.23452837765216827,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8694444444444445,
"signal/frontier_coverage_5/group_std_mean": 0.31311619877815244,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002931604813784361,
"signal/frontier_ece_reward/centered_abs_mean": 0.08764429241418839,
"signal/frontier_ece_reward/group_bin_occupancy": 0.809375,
"signal/frontier_ece_reward/group_std_mean": 0.11243927627801895,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00876442939043045,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00876442939043045,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3069328278303146,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8336805555555555,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3807151556015015,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030693282932043077,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030693282932043077,
"step": 45
},
{
"calibration/aurc": 0.40581392002668315,
"calibration/batch_distribution_entropy": 0.9864032887347619,
"calibration/batch_entropy_100bins": 0.9648712886165287,
"calibration/batch_entropy_10bins": 0.9864032887347619,
"calibration/batch_entropy_50bins": 0.9766999165423931,
"calibration/batch_uniqueness": 0.953430626200465,
"calibration/buffer_distribution_entropy": 0.7885463322323418,
"calibration/buffer_entropy_100bins": 0.7637818833081469,
"calibration/buffer_entropy_10bins": 0.7885463322323418,
"calibration/buffer_entropy_50bins": 0.8096014118372388,
"calibration/confidence_entropy": 0.48178658393199997,
"calibration/coverage@0%": 0.00478037274518404,
"calibration/coverage@1%": 0.00478037274518404,
"calibration/coverage@10%": 0.00478037274518404,
"calibration/coverage@15%": 0.01598037274518404,
"calibration/coverage@20%": 0.018613443611325775,
"calibration/coverage@25%": 0.06659603789230381,
"calibration/coverage@30%": 0.11990588262883446,
"calibration/coverage@5%": 0.00478037274518404,
"calibration/ece": 0.2005017801122319,
"calibration/mean_confidence": 0.5256280232531849,
"calibration/prompt_uniqueness": 0.8879285110994667,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017274305555555536,
"completions/max_length": 3381.8,
"completions/max_terminated_length": 3381.8,
"completions/mean_length": 594.59775390625,
"completions/mean_terminated_length": 605.0348510742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.6,
"epoch": 0.11999850001874976,
"grad_norm": 0.0005370384315028787,
"learning_rate": 4.759036144578314e-06,
"loss": -0.0163,
"num_tokens": 91060468.0,
"reward": 0.9574363589286804,
"reward_std": 0.16030279099941253,
"rewards/accuracy_reward": 0.6344617962837219,
"rewards/brier_reward": 0.7066903710365295,
"rewards/confidence_uniqueness_reward": 0.9360496401786804,
"rewards/format_reward": 0.9825520992279053,
"rewards/frontier_aurc_reward": -0.0024419894441962244,
"rewards/frontier_coverage_0": -0.03829344231635332,
"rewards/frontier_coverage_1": -0.03829344231635332,
"rewards/frontier_coverage_10": -0.03829344231635332,
"rewards/frontier_coverage_15": -0.03829344231635332,
"rewards/frontier_coverage_20": -0.03829344231635332,
"rewards/frontier_coverage_25": -0.03829344231635332,
"rewards/frontier_coverage_5": -0.03829344231635332,
"rewards/frontier_ece_reward": 0.015390362963080407,
"rewards/frontier_entropy_batch_reward": -0.1350240170955658,
"signal/accuracy_reward/centered_abs_mean": 0.18443467915058137,
"signal/accuracy_reward/group_bin_occupancy": 0.21006944444444442,
"signal/accuracy_reward/group_std_mean": 0.24160374104976653,
"signal/accuracy_reward/group_zero_std_frac": 0.3194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09221733957529069,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09221733957529069,
"signal/advantage_abs_mean": 0.11947052925825119,
"signal/advantage_pre_scale_abs_mean": 0.11947052925825119,
"signal/advantage_pre_scale_std": 0.18084822595119476,
"signal/advantage_std": 0.18084822595119476,
"signal/brier_reward/centered_abs_mean": 0.24059803187847137,
"signal/brier_reward/group_bin_occupancy": 0.9128472222222221,
"signal/brier_reward/group_std_mean": 0.2905759453773499,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024059804528951644,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.024059804528951644,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03946094214916229,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7940972222222223,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06758813932538033,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003946094121783972,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003946094121783972,
"signal/format_reward/centered_abs_mean": 0.02996419295668602,
"signal/format_reward/group_bin_occupancy": 0.15381944444444443,
"signal/format_reward/group_std_mean": 0.05656718313694,
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01498209647834301,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01498209647834301,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017862386535853147,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7190972222222223,
"signal/frontier_aurc_reward/group_std_mean": 0.0027869833167642353,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2327983970171772e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2327983970171772e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.26630950570106504,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8913194444444444,
"signal/frontier_coverage_0/group_std_mean": 0.34513433575630187,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_1/centered_abs_mean": 0.26630950570106504,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8913194444444444,
"signal/frontier_coverage_1/group_std_mean": 0.34513433575630187,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_10/centered_abs_mean": 0.26630950570106504,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8913194444444444,
"signal/frontier_coverage_10/group_std_mean": 0.34513433575630187,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_15/centered_abs_mean": 0.26630950570106504,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8913194444444444,
"signal/frontier_coverage_15/group_std_mean": 0.34513433575630187,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_20/centered_abs_mean": 0.26630950570106504,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8913194444444444,
"signal/frontier_coverage_20/group_std_mean": 0.34513433575630187,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_25/centered_abs_mean": 0.26630950570106504,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8913194444444444,
"signal/frontier_coverage_25/group_std_mean": 0.34513433575630187,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_5/centered_abs_mean": 0.26630950570106504,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8913194444444444,
"signal/frontier_coverage_5/group_std_mean": 0.34513433575630187,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033288690727204086,
"signal/frontier_ece_reward/centered_abs_mean": 0.08141317814588547,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8072916666666666,
"signal/frontier_ece_reward/group_std_mean": 0.10631832182407379,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008141317777335644,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008141317777335644,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22239840626716614,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7815972222222223,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2998348593711853,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022239841893315314,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022239841893315314,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_calibration/aurc": 0.2109596362293293,
"eval_calibration/batch_distribution_entropy": 0.9226056413086375,
"eval_calibration/batch_entropy_100bins": 0.6951142630838091,
"eval_calibration/batch_entropy_10bins": 0.9226056413086375,
"eval_calibration/batch_entropy_50bins": 0.7795849121323298,
"eval_calibration/batch_uniqueness": 0.8931600654699965,
"eval_calibration/buffer_distribution_entropy": 0.815512391941836,
"eval_calibration/buffer_entropy_100bins": 0.7914134740073022,
"eval_calibration/buffer_entropy_10bins": 0.815512391941836,
"eval_calibration/buffer_entropy_50bins": 0.8336375838667806,
"eval_calibration/confidence_entropy": 0.49500069308362776,
"eval_calibration/coverage@0%": 0.13205645161290322,
"eval_calibration/coverage@1%": 0.13205645161290322,
"eval_calibration/coverage@10%": 0.26915322580645157,
"eval_calibration/coverage@15%": 0.3790322580645162,
"eval_calibration/coverage@20%": 0.6399529569892474,
"eval_calibration/coverage@25%": 0.7288306451612904,
"eval_calibration/coverage@30%": 0.8385416666666666,
"eval_calibration/coverage@5%": 0.13205645161290322,
"eval_calibration/ece": 0.25606512017994004,
"eval_calibration/mean_confidence": 0.5449926770716114,
"eval_calibration/prompt_uniqueness": 0.8931600654699965,
"eval_completions/clipped_ratio": 0.018229166666666668,
"eval_completions/max_length": 2121.5,
"eval_completions/max_terminated_length": 2121.5,
"eval_completions/mean_length": 582.2923787434896,
"eval_completions/mean_terminated_length": 593.1960754394531,
"eval_completions/min_length": 46.5,
"eval_completions/min_terminated_length": 204.0,
"eval_loss": 0.0,
"eval_num_tokens": 91060468.0,
"eval_reward": 0.8969475229581197,
"eval_reward_std": 0.25840714077154797,
"eval_rewards/accuracy_reward": 0.6249999900658926,
"eval_rewards/brier_reward": 0.7243680159250895,
"eval_rewards/confidence_uniqueness_reward": 0.8756765027840933,
"eval_rewards/format_reward": 0.9774305621782938,
"eval_rewards/frontier_aurc_reward": -0.0024169180736256144,
"eval_rewards/frontier_coverage_0": -0.014742235808322826,
"eval_rewards/frontier_coverage_1": -0.014742235808322826,
"eval_rewards/frontier_coverage_10": -0.014742235808322826,
"eval_rewards/frontier_coverage_15": -0.014742235808322826,
"eval_rewards/frontier_coverage_20": -0.014742235808322826,
"eval_rewards/frontier_coverage_25": -0.014742235808322826,
"eval_rewards/frontier_coverage_5": -0.014742235808322826,
"eval_rewards/frontier_ece_reward": 0.015066012740135193,
"eval_rewards/frontier_entropy_batch_reward": -0.6445865134398142,
"eval_runtime": 205.5472,
"eval_samples_per_second": 4.865,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4510633647441864,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4817399134238561,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2255316823720932,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2255316823720932,
"eval_signal/advantage_abs_mean": 0.21911720434824625,
"eval_signal/advantage_pre_scale_abs_mean": 0.21911720434824625,
"eval_signal/advantage_pre_scale_std": 0.2567944601178169,
"eval_signal/advantage_std": 0.2567944601178169,
"eval_signal/brier_reward/centered_abs_mean": 0.23708807677030563,
"eval_signal/brier_reward/group_bin_occupancy": 0.9201388888888888,
"eval_signal/brier_reward/group_std_mean": 0.2897856483856837,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023708807304501534,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.023708807304501534,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06476977219184239,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3819444444444444,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1230657051006953,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006476977374404669,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006476977374404669,
"eval_signal/format_reward/centered_abs_mean": 0.043077257461845875,
"eval_signal/format_reward/group_bin_occupancy": 0.19444444444444445,
"eval_signal/format_reward/group_std_mean": 0.10973560561736424,
"eval_signal/format_reward/group_zero_std_frac": 0.4444444527228673,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.021538628730922937,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.021538628730922937,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020677158997083702,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7222222222222222,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0035327961280321083,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.584644911015251e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.584644911015251e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2857043494780858,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9479166666666666,
"eval_signal/frontier_coverage_0/group_std_mean": 0.39817163348197937,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2857043494780858,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9479166666666666,
"eval_signal/frontier_coverage_1/group_std_mean": 0.39817163348197937,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2857043494780858,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9479166666666666,
"eval_signal/frontier_coverage_10/group_std_mean": 0.39817163348197937,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2857043494780858,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9479166666666666,
"eval_signal/frontier_coverage_15/group_std_mean": 0.39817163348197937,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2857043494780858,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9479166666666666,
"eval_signal/frontier_coverage_20/group_std_mean": 0.39817163348197937,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2857043494780858,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9479166666666666,
"eval_signal/frontier_coverage_25/group_std_mean": 0.39817163348197937,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2857043494780858,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9479166666666666,
"eval_signal/frontier_coverage_5/group_std_mean": 0.39817163348197937,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035713044150422015,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.07295310931901137,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8715277777777777,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0984811931848526,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007295310885335009,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007295310885335009,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3177054176727931,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3194444444444444,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33345575133959454,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031770541022221245,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031770541022221245,
"eval_steps_per_second": 0.029,
"step": 50
},
{
"calibration/aurc": 0.2694222235390976,
"calibration/batch_distribution_entropy": 0.9743103185657492,
"calibration/batch_entropy_100bins": 0.9569912029449112,
"calibration/batch_entropy_10bins": 0.9743103185657492,
"calibration/batch_entropy_50bins": 0.9695456736926709,
"calibration/batch_uniqueness": 0.9509898009239965,
"calibration/buffer_distribution_entropy": 0.8302606383123452,
"calibration/buffer_entropy_100bins": 0.807123888242816,
"calibration/buffer_entropy_10bins": 0.8302606383123452,
"calibration/buffer_entropy_50bins": 0.8468961491187944,
"calibration/confidence_entropy": 0.5156970864044457,
"calibration/coverage@0%": 0.0036787068283131276,
"calibration/coverage@1%": 0.0036787068283131276,
"calibration/coverage@10%": 0.023101279006790817,
"calibration/coverage@15%": 0.06956951427694946,
"calibration/coverage@20%": 0.3806686646144719,
"calibration/coverage@25%": 0.5535145450606225,
"calibration/coverage@30%": 0.7110186806437707,
"calibration/coverage@5%": 0.0036787068283131276,
"calibration/ece": 0.1878179475943192,
"calibration/mean_confidence": 0.5640014221751124,
"calibration/prompt_uniqueness": 0.8892825716455885,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018836805555555558,
"completions/max_length": 3432.4,
"completions/max_terminated_length": 3432.4,
"completions/mean_length": 611.9341186523437,
"completions/mean_terminated_length": 623.7887573242188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.4,
"epoch": 0.13199835002062474,
"grad_norm": 0.00044835961307398975,
"learning_rate": 4.60843373493976e-06,
"loss": -0.0164,
"num_tokens": 101190525.0,
"reward": 0.9691686749458313,
"reward_std": 0.15596783459186553,
"rewards/accuracy_reward": 0.6544270634651184,
"rewards/brier_reward": 0.728352153301239,
"rewards/confidence_uniqueness_reward": 0.9341305613517761,
"rewards/format_reward": 0.9808159828186035,
"rewards/frontier_aurc_reward": -0.0021917944541200995,
"rewards/frontier_coverage_0": -0.03554604309611022,
"rewards/frontier_coverage_1": -0.03554604309611022,
"rewards/frontier_coverage_10": -0.03554604309611022,
"rewards/frontier_coverage_15": -0.03554604309611022,
"rewards/frontier_coverage_20": -0.03554604309611022,
"rewards/frontier_coverage_25": -0.03554604309611022,
"rewards/frontier_coverage_5": -0.03554604309611022,
"rewards/frontier_ece_reward": 0.015853497385978698,
"rewards/frontier_entropy_batch_reward": -0.13148798942565917,
"signal/accuracy_reward/centered_abs_mean": 0.1805935323238373,
"signal/accuracy_reward/group_bin_occupancy": 0.20694444444444443,
"signal/accuracy_reward/group_std_mean": 0.23551449477672576,
"signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09029676616191865,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09029676616191865,
"signal/advantage_abs_mean": 0.1168292984366417,
"signal/advantage_pre_scale_abs_mean": 0.1168292984366417,
"signal/advantage_pre_scale_std": 0.18130592703819276,
"signal/advantage_std": 0.18130592703819276,
"signal/brier_reward/centered_abs_mean": 0.2102464973926544,
"signal/brier_reward/group_bin_occupancy": 0.9072916666666668,
"signal/brier_reward/group_std_mean": 0.2576879024505615,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02102465070784092,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02102465070784092,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040306436270475386,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7975694444444443,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06543851867318154,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004030643822625279,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004030643822625279,
"signal/format_reward/centered_abs_mean": 0.03095160648226738,
"signal/format_reward/group_bin_occupancy": 0.15138888888888888,
"signal/format_reward/group_std_mean": 0.05426007434725762,
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01547580324113369,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01547580324113369,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001665117172524333,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7086805555555556,
"signal/frontier_aurc_reward/group_std_mean": 0.0026435004081577064,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.081396560242865e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.081396560242865e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2305227130651474,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8920138888888889,
"signal/frontier_coverage_0/group_std_mean": 0.30123440027236936,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_1/centered_abs_mean": 0.2305227130651474,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8920138888888889,
"signal/frontier_coverage_1/group_std_mean": 0.30123440027236936,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_10/centered_abs_mean": 0.2305227130651474,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8920138888888889,
"signal/frontier_coverage_10/group_std_mean": 0.30123440027236936,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_15/centered_abs_mean": 0.2305227130651474,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8920138888888889,
"signal/frontier_coverage_15/group_std_mean": 0.30123440027236936,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_20/centered_abs_mean": 0.2305227130651474,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8920138888888889,
"signal/frontier_coverage_20/group_std_mean": 0.30123440027236936,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_25/centered_abs_mean": 0.2305227130651474,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8920138888888889,
"signal/frontier_coverage_25/group_std_mean": 0.30123440027236936,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_5/centered_abs_mean": 0.2305227130651474,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8920138888888889,
"signal/frontier_coverage_5/group_std_mean": 0.30123440027236936,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028815338853746654,
"signal/frontier_ece_reward/centered_abs_mean": 0.06926655918359756,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7784722222222222,
"signal/frontier_ece_reward/group_std_mean": 0.09231365174055099,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006926656048744917,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006926656048744917,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21102777123451233,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8166666666666667,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.283634626865387,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021102776750922203,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021102776750922203,
"step": 55
},
{
"calibration/aurc": 0.3307035914636969,
"calibration/batch_distribution_entropy": 0.9677243577600381,
"calibration/batch_entropy_100bins": 0.9571977793903559,
"calibration/batch_entropy_10bins": 0.9677243577600381,
"calibration/batch_entropy_50bins": 0.9681875033490638,
"calibration/batch_uniqueness": 0.9505193455375718,
"calibration/buffer_distribution_entropy": 0.8528815324002004,
"calibration/buffer_entropy_100bins": 0.8344222403886594,
"calibration/buffer_entropy_10bins": 0.8528815324002004,
"calibration/buffer_entropy_50bins": 0.8685746685852462,
"calibration/confidence_entropy": 0.5152009551346997,
"calibration/coverage@0%": 0.01153219045162198,
"calibration/coverage@1%": 0.01153219045162198,
"calibration/coverage@10%": 0.08744842081811413,
"calibration/coverage@15%": 0.22566831610607224,
"calibration/coverage@20%": 0.3026316668914125,
"calibration/coverage@25%": 0.37020823239169576,
"calibration/coverage@30%": 0.43324681560185196,
"calibration/coverage@5%": 0.032998159038009414,
"calibration/ece": 0.19605623757236762,
"calibration/mean_confidence": 0.5866015536543909,
"calibration/prompt_uniqueness": 0.8868937097156987,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015798611111111117,
"completions/max_length": 3490.0,
"completions/max_terminated_length": 3490.0,
"completions/mean_length": 626.966845703125,
"completions/mean_terminated_length": 637.0339233398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 158.4,
"epoch": 0.14399820002249972,
"grad_norm": 0.00042762517114169896,
"learning_rate": 4.457831325301205e-06,
"loss": -0.0143,
"num_tokens": 111509759.0,
"reward": 0.9568191528320312,
"reward_std": 0.15329523682594298,
"rewards/accuracy_reward": 0.6266492962837219,
"rewards/brier_reward": 0.7438425660133362,
"rewards/confidence_uniqueness_reward": 0.9345135450363159,
"rewards/format_reward": 0.9841145753860474,
"rewards/frontier_aurc_reward": -0.0022769244387745857,
"rewards/frontier_coverage_0": -0.002498930087313056,
"rewards/frontier_coverage_1": -0.002498930087313056,
"rewards/frontier_coverage_10": -0.002498930087313056,
"rewards/frontier_coverage_15": -0.002498930087313056,
"rewards/frontier_coverage_20": -0.002498930087313056,
"rewards/frontier_coverage_25": -0.002498930087313056,
"rewards/frontier_coverage_5": -0.002498930087313056,
"rewards/frontier_ece_reward": 0.02260393425822258,
"rewards/frontier_entropy_batch_reward": -0.18411682844161986,
"signal/accuracy_reward/centered_abs_mean": 0.18255750834941864,
"signal/accuracy_reward/group_bin_occupancy": 0.21145833333333336,
"signal/accuracy_reward/group_std_mean": 0.24254016876220702,
"signal/accuracy_reward/group_zero_std_frac": 0.3083333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09127875417470932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09127875417470932,
"signal/advantage_abs_mean": 0.11400031745433807,
"signal/advantage_pre_scale_abs_mean": 0.11400031745433807,
"signal/advantage_pre_scale_std": 0.17615911066532136,
"signal/advantage_std": 0.17615911066532136,
"signal/brier_reward/centered_abs_mean": 0.19489111006259918,
"signal/brier_reward/group_bin_occupancy": 0.8920138888888889,
"signal/brier_reward/group_std_mean": 0.24229688942432404,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019489111378788948,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019489111378788948,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037897521257400514,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8170138888888889,
"signal/confidence_uniqueness_reward/group_std_mean": 0.062398982048034665,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003789752395823598,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003789752395823598,
"signal/format_reward/centered_abs_mean": 0.02632921040058136,
"signal/format_reward/group_bin_occupancy": 0.1496527777777778,
"signal/format_reward/group_std_mean": 0.04854804500937462,
"signal/format_reward/group_zero_std_frac": 0.8027777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01316460520029068,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01316460520029068,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019377078860998154,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7121527777777779,
"signal/frontier_aurc_reward/group_std_mean": 0.0030613655224442484,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.422134857624769e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.422134857624769e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2110768437385559,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8638888888888889,
"signal/frontier_coverage_0/group_std_mean": 0.28098778128623964,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_1/centered_abs_mean": 0.2110768437385559,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8638888888888889,
"signal/frontier_coverage_1/group_std_mean": 0.28098778128623964,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_10/centered_abs_mean": 0.2110768437385559,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8638888888888889,
"signal/frontier_coverage_10/group_std_mean": 0.28098778128623964,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_15/centered_abs_mean": 0.2110768437385559,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8638888888888889,
"signal/frontier_coverage_15/group_std_mean": 0.28098778128623964,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_20/centered_abs_mean": 0.2110768437385559,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8638888888888889,
"signal/frontier_coverage_20/group_std_mean": 0.28098778128623964,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_25/centered_abs_mean": 0.2110768437385559,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8638888888888889,
"signal/frontier_coverage_25/group_std_mean": 0.28098778128623964,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_5/centered_abs_mean": 0.2110768437385559,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8638888888888889,
"signal/frontier_coverage_5/group_std_mean": 0.28098778128623964,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002638460695743561,
"signal/frontier_ece_reward/centered_abs_mean": 0.06807545423507691,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7677083333333334,
"signal/frontier_ece_reward/group_std_mean": 0.08898028582334519,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006807545572519303,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006807545572519303,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2613932341337204,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8180555555555558,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33782604336738586,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02613932266831398,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02613932266831398,
"step": 60
},
{
"calibration/aurc": 0.23300829060517123,
"calibration/batch_distribution_entropy": 0.9831016338957796,
"calibration/batch_entropy_100bins": 0.9632753017365372,
"calibration/batch_entropy_10bins": 0.9831016338957796,
"calibration/batch_entropy_50bins": 0.9759229135714376,
"calibration/batch_uniqueness": 0.9525056216175093,
"calibration/buffer_distribution_entropy": 0.8715169058505172,
"calibration/buffer_entropy_100bins": 0.8564367522033102,
"calibration/buffer_entropy_10bins": 0.8715169058505172,
"calibration/buffer_entropy_50bins": 0.8863222147297156,
"calibration/confidence_entropy": 0.48784240565714115,
"calibration/coverage@0%": 0.02786145806735325,
"calibration/coverage@1%": 0.02786145806735325,
"calibration/coverage@10%": 0.1591952861236417,
"calibration/coverage@15%": 0.4360120187533691,
"calibration/coverage@20%": 0.5692992045422305,
"calibration/coverage@25%": 0.6198779772037336,
"calibration/coverage@30%": 0.7344086021505376,
"calibration/coverage@5%": 0.04193082921950804,
"calibration/ece": 0.19008310139291312,
"calibration/mean_confidence": 0.5527640346426371,
"calibration/prompt_uniqueness": 0.87965394467687,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666666666675,
"completions/max_length": 3113.2,
"completions/max_terminated_length": 3113.2,
"completions/mean_length": 610.5935913085938,
"completions/mean_terminated_length": 616.9865234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 147.4,
"epoch": 0.1559980500243747,
"grad_norm": 0.0005515015218406916,
"learning_rate": 4.307228915662651e-06,
"loss": -0.0085,
"num_tokens": 121637845.0,
"reward": 0.9779425144195557,
"reward_std": 0.13538099378347396,
"rewards/accuracy_reward": 0.6507812380790711,
"rewards/brier_reward": 0.7501181960105896,
"rewards/confidence_uniqueness_reward": 0.9430691480636597,
"rewards/format_reward": 0.9895833373069763,
"rewards/frontier_aurc_reward": -0.0019629735965281726,
"rewards/frontier_coverage_0": -0.008767739811446518,
"rewards/frontier_coverage_1": -0.008767739811446518,
"rewards/frontier_coverage_10": -0.008767739811446518,
"rewards/frontier_coverage_15": -0.008767739811446518,
"rewards/frontier_coverage_20": -0.008767739811446518,
"rewards/frontier_coverage_25": -0.008767739811446518,
"rewards/frontier_coverage_5": -0.008767739811446518,
"rewards/frontier_ece_reward": 0.020754358358681203,
"rewards/frontier_entropy_batch_reward": -0.12842243015766144,
"signal/accuracy_reward/centered_abs_mean": 0.167333984375,
"signal/accuracy_reward/group_bin_occupancy": 0.20243055555555553,
"signal/accuracy_reward/group_std_mean": 0.2216338872909546,
"signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0836669921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0836669921875,
"signal/advantage_abs_mean": 0.09993359893560409,
"signal/advantage_pre_scale_abs_mean": 0.09993359893560409,
"signal/advantage_pre_scale_std": 0.15722057819366456,
"signal/advantage_std": 0.15722057819366456,
"signal/brier_reward/centered_abs_mean": 0.19362751245498658,
"signal/brier_reward/group_bin_occupancy": 0.875,
"signal/brier_reward/group_std_mean": 0.24217391312122344,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019362751767039298,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019362751767039298,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028328100219368935,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8430555555555556,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04791910648345947,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028328101616352797,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028328101616352797,
"signal/format_reward/centered_abs_mean": 0.01808810755610466,
"signal/format_reward/group_bin_occupancy": 0.1440972222222222,
"signal/format_reward/group_std_mean": 0.03565124273300171,
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00904405377805233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00904405377805233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016498573124408722,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6878472222222223,
"signal/frontier_aurc_reward/group_std_mean": 0.0026893588714301587,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0623216914827936e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0623216914827936e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2315441280603409,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8649305555555555,
"signal/frontier_coverage_0/group_std_mean": 0.3030831813812256,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_1/centered_abs_mean": 0.2315441280603409,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8649305555555555,
"signal/frontier_coverage_1/group_std_mean": 0.3030831813812256,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_10/centered_abs_mean": 0.2315441280603409,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8649305555555555,
"signal/frontier_coverage_10/group_std_mean": 0.3030831813812256,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_15/centered_abs_mean": 0.2315441280603409,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8649305555555555,
"signal/frontier_coverage_15/group_std_mean": 0.3030831813812256,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_20/centered_abs_mean": 0.2315441280603409,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8649305555555555,
"signal/frontier_coverage_20/group_std_mean": 0.3030831813812256,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_25/centered_abs_mean": 0.2315441280603409,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8649305555555555,
"signal/frontier_coverage_25/group_std_mean": 0.3030831813812256,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_5/centered_abs_mean": 0.2315441280603409,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8649305555555555,
"signal/frontier_coverage_5/group_std_mean": 0.3030831813812256,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002894301526248455,
"signal/frontier_ece_reward/centered_abs_mean": 0.06531385183334351,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7784722222222221,
"signal/frontier_ece_reward/group_std_mean": 0.08503876328468322,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006531385611742735,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006531385611742735,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20490280091762542,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7586805555555556,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.27364385724067686,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02049028016626835,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02049028016626835,
"step": 65
},
{
"calibration/aurc": 0.29206435720557744,
"calibration/batch_distribution_entropy": 0.967741577432348,
"calibration/batch_entropy_100bins": 0.9547615908625235,
"calibration/batch_entropy_10bins": 0.967741577432348,
"calibration/batch_entropy_50bins": 0.9660211465920273,
"calibration/batch_uniqueness": 0.9491694668612134,
"calibration/buffer_distribution_entropy": 0.8853528168662106,
"calibration/buffer_entropy_100bins": 0.8734316453290203,
"calibration/buffer_entropy_10bins": 0.8853528168662106,
"calibration/buffer_entropy_50bins": 0.8995535410048392,
"calibration/confidence_entropy": 0.47921780511639545,
"calibration/coverage@0%": 0.008454719126847999,
"calibration/coverage@1%": 0.008454719126847999,
"calibration/coverage@10%": 0.10795939337610916,
"calibration/coverage@15%": 0.16163093522390154,
"calibration/coverage@20%": 0.20303708239739757,
"calibration/coverage@25%": 0.27123720908418275,
"calibration/coverage@30%": 0.46396145357735036,
"calibration/coverage@5%": 0.008454719126847999,
"calibration/ece": 0.1539259917359437,
"calibration/mean_confidence": 0.590760119025805,
"calibration/prompt_uniqueness": 0.8780244614506524,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01163194444444442,
"completions/max_length": 3017.8,
"completions/max_terminated_length": 3017.8,
"completions/mean_length": 598.3328247070312,
"completions/mean_terminated_length": 605.3543090820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 137.2,
"epoch": 0.16799790002624967,
"grad_norm": 0.00044609271571971476,
"learning_rate": 4.156626506024097e-06,
"loss": -0.01,
"num_tokens": 131608783.0,
"reward": 0.964812970161438,
"reward_std": 0.13441329300403596,
"rewards/accuracy_reward": 0.6355902791023255,
"rewards/brier_reward": 0.7468560457229614,
"rewards/confidence_uniqueness_reward": 0.9387478590011596,
"rewards/format_reward": 0.9881944537162781,
"rewards/frontier_aurc_reward": -0.002175836288370192,
"rewards/frontier_coverage_0": 0.000884566456079483,
"rewards/frontier_coverage_1": 0.000884566456079483,
"rewards/frontier_coverage_10": 0.000884566456079483,
"rewards/frontier_coverage_15": 0.000884566456079483,
"rewards/frontier_coverage_20": 0.000884566456079483,
"rewards/frontier_coverage_25": 0.000884566456079483,
"rewards/frontier_coverage_5": 0.000884566456079483,
"rewards/frontier_ece_reward": 0.02232353687286377,
"rewards/frontier_entropy_batch_reward": -0.17922367453575133,
"signal/accuracy_reward/centered_abs_mean": 0.16927083134651183,
"signal/accuracy_reward/group_bin_occupancy": 0.19895833333333335,
"signal/accuracy_reward/group_std_mean": 0.2159910023212433,
"signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08463541567325591,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08463541567325591,
"signal/advantage_abs_mean": 0.10277295261621475,
"signal/advantage_pre_scale_abs_mean": 0.10277295261621475,
"signal/advantage_pre_scale_std": 0.16154861450195312,
"signal/advantage_std": 0.16154861450195312,
"signal/brier_reward/centered_abs_mean": 0.18858011364936828,
"signal/brier_reward/group_bin_occupancy": 0.8704861111111111,
"signal/brier_reward/group_std_mean": 0.23463993072509765,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018858011066913604,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.018858011066913604,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03243453465402126,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8458333333333334,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05219922661781311,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003243453614413738,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003243453614413738,
"signal/format_reward/centered_abs_mean": 0.01965060755610466,
"signal/format_reward/group_bin_occupancy": 0.14340277777777777,
"signal/format_reward/group_std_mean": 0.03690010011196136,
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00982530377805233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00982530377805233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001957321958616376,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6996527777777778,
"signal/frontier_aurc_reward/group_std_mean": 0.003007865697145462,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.446652579237707e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.446652579237707e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2180919259786606,
"signal/frontier_coverage_0/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_0/group_std_mean": 0.2869709312915802,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_1/centered_abs_mean": 0.2180919259786606,
"signal/frontier_coverage_1/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_1/group_std_mean": 0.2869709312915802,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_10/centered_abs_mean": 0.2180919259786606,
"signal/frontier_coverage_10/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_10/group_std_mean": 0.2869709312915802,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_15/centered_abs_mean": 0.2180919259786606,
"signal/frontier_coverage_15/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_15/group_std_mean": 0.2869709312915802,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_20/centered_abs_mean": 0.2180919259786606,
"signal/frontier_coverage_20/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_20/group_std_mean": 0.2869709312915802,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_25/centered_abs_mean": 0.2180919259786606,
"signal/frontier_coverage_25/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_25/group_std_mean": 0.2869709312915802,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_5/centered_abs_mean": 0.2180919259786606,
"signal/frontier_coverage_5/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_5/group_std_mean": 0.2869709312915802,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027261491399258376,
"signal/frontier_ece_reward/centered_abs_mean": 0.06389048919081688,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7607638888888889,
"signal/frontier_ece_reward/group_std_mean": 0.08154775202274323,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006389048788696528,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006389048788696528,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25127990543842316,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3219525694847107,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025127990543842314,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025127990543842314,
"step": 70
},
{
"calibration/aurc": 0.24865885558565473,
"calibration/batch_distribution_entropy": 0.96489969481927,
"calibration/batch_entropy_100bins": 0.9516288732160346,
"calibration/batch_entropy_10bins": 0.96489969481927,
"calibration/batch_entropy_50bins": 0.9641975783352297,
"calibration/batch_uniqueness": 0.948999975102953,
"calibration/buffer_distribution_entropy": 0.8949759127137261,
"calibration/buffer_entropy_100bins": 0.8868731831868493,
"calibration/buffer_entropy_10bins": 0.8949759127137261,
"calibration/buffer_entropy_50bins": 0.9096876693511307,
"calibration/confidence_entropy": 0.5163309735163633,
"calibration/coverage@0%": 0.00838168656056587,
"calibration/coverage@1%": 0.00838168656056587,
"calibration/coverage@10%": 0.1720974248452697,
"calibration/coverage@15%": 0.24661665561450047,
"calibration/coverage@20%": 0.3965249226348364,
"calibration/coverage@25%": 0.6515518841034948,
"calibration/coverage@30%": 0.7176282051282051,
"calibration/coverage@5%": 0.016715019893899206,
"calibration/ece": 0.18913358089517085,
"calibration/mean_confidence": 0.5699598192734223,
"calibration/prompt_uniqueness": 0.8794210313331021,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005902777777777768,
"completions/max_length": 2728.0,
"completions/max_terminated_length": 2728.0,
"completions/mean_length": 617.7827270507812,
"completions/mean_terminated_length": 621.47099609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.2,
"epoch": 0.17999775002812465,
"grad_norm": 0.0004136954667046666,
"learning_rate": 4.006024096385543e-06,
"loss": -0.0037,
"num_tokens": 141790536.0,
"reward": 0.9999241590499878,
"reward_std": 0.12385457307100296,
"rewards/accuracy_reward": 0.6986111044883728,
"rewards/brier_reward": 0.764533269405365,
"rewards/confidence_uniqueness_reward": 0.9451632022857666,
"rewards/format_reward": 0.9940972208976746,
"rewards/frontier_aurc_reward": -0.001692651305347681,
"rewards/frontier_coverage_0": -0.03404254494234919,
"rewards/frontier_coverage_1": -0.03404254494234919,
"rewards/frontier_coverage_10": -0.03404254494234919,
"rewards/frontier_coverage_15": -0.03404254494234919,
"rewards/frontier_coverage_20": -0.03404254494234919,
"rewards/frontier_coverage_25": -0.03404254494234919,
"rewards/frontier_coverage_5": -0.03404254494234919,
"rewards/frontier_ece_reward": 0.017354899458587168,
"rewards/frontier_entropy_batch_reward": -0.1613529622554779,
"signal/accuracy_reward/centered_abs_mean": 0.15860459804534913,
"signal/accuracy_reward/group_bin_occupancy": 0.2,
"signal/accuracy_reward/group_std_mean": 0.21018220484256744,
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07930229902267456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07930229902267456,
"signal/advantage_abs_mean": 0.09171124696731567,
"signal/advantage_pre_scale_abs_mean": 0.09171124696731567,
"signal/advantage_pre_scale_std": 0.1453747808933258,
"signal/advantage_std": 0.1453747808933258,
"signal/brier_reward/centered_abs_mean": 0.16959642767906188,
"signal/brier_reward/group_bin_occupancy": 0.8746527777777778,
"signal/brier_reward/group_std_mean": 0.21293676793575286,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016959642991423607,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016959642991423607,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023114091902971267,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8760416666666668,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03923774063587189,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023114092415198683,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023114092415198683,
"signal/format_reward/centered_abs_mean": 0.010861545195803046,
"signal/format_reward/group_bin_occupancy": 0.1392361111111111,
"signal/format_reward/group_std_mean": 0.0244428563863039,
"signal/format_reward/group_zero_std_frac": 0.8861111283302308,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005430772597901523,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005430772597901523,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014858563197776675,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7024305555555556,
"signal/frontier_aurc_reward/group_std_mean": 0.0023568171076476575,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.857320366980275e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.857320366980275e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20621402859687804,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8604166666666668,
"signal/frontier_coverage_0/group_std_mean": 0.2692394435405731,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_1/centered_abs_mean": 0.20621402859687804,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8604166666666668,
"signal/frontier_coverage_1/group_std_mean": 0.2692394435405731,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_10/centered_abs_mean": 0.20621402859687804,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8604166666666668,
"signal/frontier_coverage_10/group_std_mean": 0.2692394435405731,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_15/centered_abs_mean": 0.20621402859687804,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8604166666666668,
"signal/frontier_coverage_15/group_std_mean": 0.2692394435405731,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_20/centered_abs_mean": 0.20621402859687804,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8604166666666668,
"signal/frontier_coverage_20/group_std_mean": 0.2692394435405731,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_25/centered_abs_mean": 0.20621402859687804,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8604166666666668,
"signal/frontier_coverage_25/group_std_mean": 0.2692394435405731,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_5/centered_abs_mean": 0.20621402859687804,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8604166666666668,
"signal/frontier_coverage_5/group_std_mean": 0.2692394435405731,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00257767541334033,
"signal/frontier_ece_reward/centered_abs_mean": 0.057116496562957766,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7486111111111111,
"signal/frontier_ece_reward/group_std_mean": 0.07407085299491882,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00571164982393384,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00571164982393384,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23405362963676452,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7993055555555555,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3055886387825012,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023405364155769347,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023405364155769347,
"step": 75
},
{
"calibration/aurc": 0.2076444932942266,
"calibration/batch_distribution_entropy": 0.9566517910549462,
"calibration/batch_entropy_100bins": 0.9491858911049785,
"calibration/batch_entropy_10bins": 0.9566517910549462,
"calibration/batch_entropy_50bins": 0.9586329550903072,
"calibration/batch_uniqueness": 0.9478874863011697,
"calibration/buffer_distribution_entropy": 0.9042071878672668,
"calibration/buffer_entropy_100bins": 0.8985727602789755,
"calibration/buffer_entropy_10bins": 0.9042071878672668,
"calibration/buffer_entropy_50bins": 0.9185937542698432,
"calibration/confidence_entropy": 0.49120686633309446,
"calibration/coverage@0%": 0.019409886968950837,
"calibration/coverage@1%": 0.019409886968950837,
"calibration/coverage@10%": 0.17595065074395838,
"calibration/coverage@15%": 0.5099854214864996,
"calibration/coverage@20%": 0.5846423011494986,
"calibration/coverage@25%": 0.6973665942443268,
"calibration/coverage@30%": 0.7510149662143296,
"calibration/coverage@5%": 0.11041733099983801,
"calibration/ece": 0.18258882314927863,
"calibration/mean_confidence": 0.6132432048852545,
"calibration/prompt_uniqueness": 0.8730205259179573,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555557,
"completions/max_length": 3484.6,
"completions/max_terminated_length": 3484.6,
"completions/mean_length": 672.1552978515625,
"completions/mean_terminated_length": 679.1800170898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.2,
"epoch": 0.19199760002999963,
"grad_norm": 0.0004997280775569379,
"learning_rate": 3.855421686746989e-06,
"loss": -0.0094,
"num_tokens": 152587045.0,
"reward": 0.9794714093208313,
"reward_std": 0.13951779305934905,
"rewards/accuracy_reward": 0.6626736044883728,
"rewards/brier_reward": 0.7578543424606323,
"rewards/confidence_uniqueness_reward": 0.9402774095535278,
"rewards/format_reward": 0.9893229126930236,
"rewards/frontier_aurc_reward": -0.0018948199227452277,
"rewards/frontier_coverage_0": -0.014700169442221522,
"rewards/frontier_coverage_1": -0.014700169442221522,
"rewards/frontier_coverage_10": -0.014700169442221522,
"rewards/frontier_coverage_15": -0.014700169442221522,
"rewards/frontier_coverage_20": -0.014700169442221522,
"rewards/frontier_coverage_25": -0.014700169442221522,
"rewards/frontier_coverage_5": -0.014700169442221522,
"rewards/frontier_ece_reward": 0.019117896631360053,
"rewards/frontier_entropy_batch_reward": -0.16941888332366944,
"signal/accuracy_reward/centered_abs_mean": 0.18274739384651184,
"signal/accuracy_reward/group_bin_occupancy": 0.20833333333333331,
"signal/accuracy_reward/group_std_mean": 0.23848095238208772,
"signal/accuracy_reward/group_zero_std_frac": 0.33333333134651183,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09137369692325592,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09137369692325592,
"signal/advantage_abs_mean": 0.10523971766233445,
"signal/advantage_pre_scale_abs_mean": 0.10523971766233445,
"signal/advantage_pre_scale_std": 0.16219059228897095,
"signal/advantage_std": 0.16219059228897095,
"signal/brier_reward/centered_abs_mean": 0.17205582857131957,
"signal/brier_reward/group_bin_occupancy": 0.8708333333333332,
"signal/brier_reward/group_std_mean": 0.21611034870147705,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017205582931637764,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017205582931637764,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029724714532494544,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.861111111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046706152707338335,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002972471574321389,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002972471574321389,
"signal/format_reward/centered_abs_mean": 0.01745334193110466,
"signal/format_reward/group_bin_occupancy": 0.14131944444444444,
"signal/format_reward/group_std_mean": 0.03190700151026249,
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00872667096555233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00872667096555233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017476935172453523,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7006944444444445,
"signal/frontier_aurc_reward/group_std_mean": 0.002774294326081872,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1846168237971142e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1846168237971142e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20781008899211884,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_0/group_std_mean": 0.27139957547187804,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_1/centered_abs_mean": 0.20781008899211884,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_1/group_std_mean": 0.27139957547187804,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_10/centered_abs_mean": 0.20781008899211884,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_10/group_std_mean": 0.27139957547187804,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_15/centered_abs_mean": 0.20781008899211884,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_15/group_std_mean": 0.27139957547187804,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_20/centered_abs_mean": 0.20781008899211884,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_20/group_std_mean": 0.27139957547187804,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_25/centered_abs_mean": 0.20781008899211884,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_25/group_std_mean": 0.27139957547187804,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_5/centered_abs_mean": 0.20781008899211884,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_5/group_std_mean": 0.27139957547187804,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025976261589676143,
"signal/frontier_ece_reward/centered_abs_mean": 0.0579615406692028,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7409722222222223,
"signal/frontier_ece_reward/group_std_mean": 0.07379811108112336,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005796154215931893,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005796154215931893,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24322098791599273,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7798611111111111,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3134605050086975,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02432209961116314,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02432209961116314,
"step": 80
},
{
"calibration/aurc": 0.20271091384745868,
"calibration/batch_distribution_entropy": 0.9737261800910246,
"calibration/batch_entropy_100bins": 0.9591620782628887,
"calibration/batch_entropy_10bins": 0.9737261800910246,
"calibration/batch_entropy_50bins": 0.9716196064375529,
"calibration/batch_uniqueness": 0.9514730500511689,
"calibration/buffer_distribution_entropy": 0.9112810148129586,
"calibration/buffer_entropy_100bins": 0.9083171951473498,
"calibration/buffer_entropy_10bins": 0.9112810148129586,
"calibration/buffer_entropy_50bins": 0.9257044262181973,
"calibration/confidence_entropy": 0.5057748742528816,
"calibration/coverage@0%": 0.01730324074074074,
"calibration/coverage@1%": 0.01730324074074074,
"calibration/coverage@10%": 0.2049778473780437,
"calibration/coverage@15%": 0.3992730494612039,
"calibration/coverage@20%": 0.5388349573395385,
"calibration/coverage@25%": 0.7145227360868722,
"calibration/coverage@30%": 0.8611193783068783,
"calibration/coverage@5%": 0.0400214947089947,
"calibration/ece": 0.1378626115809829,
"calibration/mean_confidence": 0.5521758505614638,
"calibration/prompt_uniqueness": 0.8702453560167897,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005555555555555536,
"completions/max_length": 3238.0,
"completions/max_terminated_length": 3238.0,
"completions/mean_length": 672.5680541992188,
"completions/mean_terminated_length": 676.3651611328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 220.4,
"epoch": 0.2039974500318746,
"grad_norm": 0.00044680043356493115,
"learning_rate": 3.7048192771084342e-06,
"loss": -0.0043,
"num_tokens": 163422229.0,
"reward": 0.9934585094451904,
"reward_std": 0.1226073071360588,
"rewards/accuracy_reward": 0.6869791746139526,
"rewards/brier_reward": 0.7851580858230591,
"rewards/confidence_uniqueness_reward": 0.9424677133560181,
"rewards/format_reward": 0.9941840291023254,
"rewards/frontier_aurc_reward": -0.0016598706366494297,
"rewards/frontier_coverage_0": -0.004633589053992182,
"rewards/frontier_coverage_1": -0.004633589053992182,
"rewards/frontier_coverage_10": -0.004633589053992182,
"rewards/frontier_coverage_15": -0.004633589053992182,
"rewards/frontier_coverage_20": -0.004633589053992182,
"rewards/frontier_coverage_25": -0.004633589053992182,
"rewards/frontier_coverage_5": -0.004633589053992182,
"rewards/frontier_ece_reward": 0.02196214161813259,
"rewards/frontier_entropy_batch_reward": -0.21655711829662322,
"signal/accuracy_reward/centered_abs_mean": 0.16119791865348815,
"signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776,
"signal/accuracy_reward/group_std_mean": 0.2105330467224121,
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08059895932674407,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08059895932674407,
"signal/advantage_abs_mean": 0.0923902839422226,
"signal/advantage_pre_scale_abs_mean": 0.0923902839422226,
"signal/advantage_pre_scale_std": 0.14465830028057097,
"signal/advantage_std": 0.14465830028057097,
"signal/brier_reward/centered_abs_mean": 0.1523455262184143,
"signal/brier_reward/group_bin_occupancy": 0.851388888888889,
"signal/brier_reward/group_std_mean": 0.1943160504102707,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015234552882611751,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015234552882611751,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421053908765316,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8864583333333332,
"signal/confidence_uniqueness_reward/group_std_mean": 0.038174081966280936,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024210539646446704,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024210539646446704,
"signal/format_reward/centered_abs_mean": 0.01048719622194767,
"signal/format_reward/group_bin_occupancy": 0.13645833333333332,
"signal/format_reward/group_std_mean": 0.021353743970394135,
"signal/format_reward/group_zero_std_frac": 0.9083333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005243598110973835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005243598110973835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015173830557614564,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6986111111111112,
"signal/frontier_aurc_reward/group_std_mean": 0.0024198783095926045,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8967288997373544e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8967288997373544e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19244979321956635,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8506944444444444,
"signal/frontier_coverage_0/group_std_mean": 0.2510490626096725,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_1/centered_abs_mean": 0.19244979321956635,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8506944444444444,
"signal/frontier_coverage_1/group_std_mean": 0.2510490626096725,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_10/centered_abs_mean": 0.19244979321956635,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8506944444444444,
"signal/frontier_coverage_10/group_std_mean": 0.2510490626096725,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_15/centered_abs_mean": 0.19244979321956635,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8506944444444444,
"signal/frontier_coverage_15/group_std_mean": 0.2510490626096725,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_20/centered_abs_mean": 0.19244979321956635,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8506944444444444,
"signal/frontier_coverage_20/group_std_mean": 0.2510490626096725,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_25/centered_abs_mean": 0.19244979321956635,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8506944444444444,
"signal/frontier_coverage_25/group_std_mean": 0.2510490626096725,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_5/centered_abs_mean": 0.19244979321956635,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8506944444444444,
"signal/frontier_coverage_5/group_std_mean": 0.2510490626096725,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024056224152445792,
"signal/frontier_ece_reward/centered_abs_mean": 0.052489711344242095,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7017361111111111,
"signal/frontier_ece_reward/group_std_mean": 0.06726017668843269,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005248971004039049,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005248971004039049,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2638679683208466,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3345774471759796,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026386797800660132,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026386797800660132,
"step": 85
},
{
"calibration/aurc": 0.1771991198328447,
"calibration/batch_distribution_entropy": 0.96243817415222,
"calibration/batch_entropy_100bins": 0.9507006712592847,
"calibration/batch_entropy_10bins": 0.96243817415222,
"calibration/batch_entropy_50bins": 0.964868282268785,
"calibration/batch_uniqueness": 0.9487259176376346,
"calibration/buffer_distribution_entropy": 0.9187161356306482,
"calibration/buffer_entropy_100bins": 0.9165996492677836,
"calibration/buffer_entropy_10bins": 0.9187161356306482,
"calibration/buffer_entropy_50bins": 0.9323867334705161,
"calibration/confidence_entropy": 0.5093906412982893,
"calibration/coverage@0%": 0.048757301233618,
"calibration/coverage@1%": 0.048757301233618,
"calibration/coverage@10%": 0.306309071579122,
"calibration/coverage@15%": 0.4915969045669811,
"calibration/coverage@20%": 0.603687003183948,
"calibration/coverage@25%": 0.780469418411813,
"calibration/coverage@30%": 0.8668421125340184,
"calibration/coverage@5%": 0.08637593441360888,
"calibration/ece": 0.16357922386808874,
"calibration/mean_confidence": 0.5690209591292246,
"calibration/prompt_uniqueness": 0.8674022167680248,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0064236111111110935,
"completions/max_length": 3078.6,
"completions/max_terminated_length": 3078.6,
"completions/mean_length": 640.9775268554688,
"completions/mean_terminated_length": 645.139404296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.6,
"epoch": 0.2159973000337496,
"grad_norm": 0.0004924891400150955,
"learning_rate": 3.5542168674698798e-06,
"loss": -0.0054,
"num_tokens": 173874962.0,
"reward": 0.9923341274261475,
"reward_std": 0.12305669635534286,
"rewards/accuracy_reward": 0.6828993082046508,
"rewards/brier_reward": 0.777597713470459,
"rewards/confidence_uniqueness_reward": 0.9433488249778748,
"rewards/format_reward": 0.9934895753860473,
"rewards/frontier_aurc_reward": -0.0015883626649156213,
"rewards/frontier_coverage_0": -0.00989127003122121,
"rewards/frontier_coverage_1": -0.00989127003122121,
"rewards/frontier_coverage_10": -0.00989127003122121,
"rewards/frontier_coverage_15": -0.00989127003122121,
"rewards/frontier_coverage_20": -0.00989127003122121,
"rewards/frontier_coverage_25": -0.00989127003122121,
"rewards/frontier_coverage_5": -0.00989127003122121,
"rewards/frontier_ece_reward": 0.018967508152127267,
"rewards/frontier_entropy_batch_reward": -0.18966446816921234,
"signal/accuracy_reward/centered_abs_mean": 0.16197374165058137,
"signal/accuracy_reward/group_bin_occupancy": 0.19791666666666669,
"signal/accuracy_reward/group_std_mean": 0.20919720828533173,
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08098687082529069,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08098687082529069,
"signal/advantage_abs_mean": 0.09262900650501252,
"signal/advantage_pre_scale_abs_mean": 0.09262900650501252,
"signal/advantage_pre_scale_std": 0.14502845108509063,
"signal/advantage_std": 0.14502845108509063,
"signal/brier_reward/centered_abs_mean": 0.15718668401241304,
"signal/brier_reward/group_bin_occupancy": 0.8552083333333333,
"signal/brier_reward/group_std_mean": 0.1992782771587372,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01571866814047098,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01571866814047098,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024519116804003715,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.88125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03934002220630646,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024519118014723063,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024519118014723063,
"signal/format_reward/centered_abs_mean": 0.011572265625,
"signal/format_reward/group_bin_occupancy": 0.1378472222222222,
"signal/format_reward/group_std_mean": 0.023632752522826195,
"signal/format_reward/group_zero_std_frac": 0.8972222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0057861328125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0057861328125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014947153860703111,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6850694444444445,
"signal/frontier_aurc_reward/group_std_mean": 0.0023850529454648496,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.868394247139804e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.868394247139804e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1994914710521698,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8565972222222221,
"signal/frontier_coverage_0/group_std_mean": 0.25926323533058165,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_1/centered_abs_mean": 0.1994914710521698,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8565972222222221,
"signal/frontier_coverage_1/group_std_mean": 0.25926323533058165,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_10/centered_abs_mean": 0.1994914710521698,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8565972222222221,
"signal/frontier_coverage_10/group_std_mean": 0.25926323533058165,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_15/centered_abs_mean": 0.1994914710521698,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8565972222222221,
"signal/frontier_coverage_15/group_std_mean": 0.25926323533058165,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_20/centered_abs_mean": 0.1994914710521698,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8565972222222221,
"signal/frontier_coverage_20/group_std_mean": 0.25926323533058165,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_25/centered_abs_mean": 0.1994914710521698,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8565972222222221,
"signal/frontier_coverage_25/group_std_mean": 0.25926323533058165,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_5/centered_abs_mean": 0.1994914710521698,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8565972222222221,
"signal/frontier_coverage_5/group_std_mean": 0.25926323533058165,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024936434347182512,
"signal/frontier_ece_reward/centered_abs_mean": 0.050507232546806335,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6986111111111111,
"signal/frontier_ece_reward/group_std_mean": 0.06456724032759667,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005050723347812891,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005050723347812891,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25887452661991117,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7711805555555555,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3302801251411438,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02588745318353176,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02588745318353176,
"step": 90
},
{
"calibration/aurc": 0.21387456839353614,
"calibration/batch_distribution_entropy": 0.9823557824251997,
"calibration/batch_entropy_100bins": 0.9648278783784832,
"calibration/batch_entropy_10bins": 0.9823557824251997,
"calibration/batch_entropy_50bins": 0.9772968018292392,
"calibration/batch_uniqueness": 0.9534788717971289,
"calibration/buffer_distribution_entropy": 0.9254646438274845,
"calibration/buffer_entropy_100bins": 0.9237693855572863,
"calibration/buffer_entropy_10bins": 0.9254646438274845,
"calibration/buffer_entropy_50bins": 0.9382846474644742,
"calibration/confidence_entropy": 0.4893090729142555,
"calibration/coverage@0%": 0.06979967507118581,
"calibration/coverage@1%": 0.10558914875539635,
"calibration/coverage@10%": 0.27119950973638285,
"calibration/coverage@15%": 0.5634953097731239,
"calibration/coverage@20%": 0.6151696116928447,
"calibration/coverage@25%": 0.654349367364747,
"calibration/coverage@30%": 0.7034767670157068,
"calibration/coverage@5%": 0.18697819073206579,
"calibration/ece": 0.20191279563686187,
"calibration/mean_confidence": 0.5199859337619741,
"calibration/prompt_uniqueness": 0.8663527153909893,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004253472222222232,
"completions/max_length": 3003.0,
"completions/max_terminated_length": 3003.0,
"completions/mean_length": 630.5767456054688,
"completions/mean_terminated_length": 633.2605712890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.4,
"epoch": 0.22799715003562457,
"grad_norm": 0.00040928201633505523,
"learning_rate": 3.4036144578313257e-06,
"loss": -0.0023,
"num_tokens": 184230886.0,
"reward": 0.9884204149246216,
"reward_std": 0.1153764232993126,
"rewards/accuracy_reward": 0.6644097328186035,
"rewards/brier_reward": 0.7661008715629578,
"rewards/confidence_uniqueness_reward": 0.9478963613510132,
"rewards/format_reward": 0.9957465291023254,
"rewards/frontier_aurc_reward": -0.0016027359291911126,
"rewards/frontier_coverage_0": -0.002308785542845726,
"rewards/frontier_coverage_1": -0.002308785542845726,
"rewards/frontier_coverage_10": -0.002308785542845726,
"rewards/frontier_coverage_15": -0.002308785542845726,
"rewards/frontier_coverage_20": -0.002308785542845726,
"rewards/frontier_coverage_25": -0.002308785542845726,
"rewards/frontier_coverage_5": -0.002308785542845726,
"rewards/frontier_ece_reward": 0.019070269353687764,
"rewards/frontier_entropy_batch_reward": -0.14742431938648223,
"signal/accuracy_reward/centered_abs_mean": 0.14867621660232544,
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
"signal/accuracy_reward/group_std_mean": 0.199493145942688,
"signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07433810830116272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07433810830116272,
"signal/advantage_abs_mean": 0.08577116578817368,
"signal/advantage_pre_scale_abs_mean": 0.08577116578817368,
"signal/advantage_pre_scale_std": 0.13571085333824157,
"signal/advantage_std": 0.13571085333824157,
"signal/brier_reward/centered_abs_mean": 0.16521736681461335,
"signal/brier_reward/group_bin_occupancy": 0.8541666666666667,
"signal/brier_reward/group_std_mean": 0.20886048674583435,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016521737165749072,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016521737165749072,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01989307664334774,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.884375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03198789656162262,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001989307696931064,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001989307696931064,
"signal/format_reward/centered_abs_mean": 0.007590060774236918,
"signal/format_reward/group_bin_occupancy": 0.13506944444444444,
"signal/format_reward/group_std_mean": 0.01698396187275648,
"signal/format_reward/group_zero_std_frac": 0.919444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003795030387118459,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003795030387118459,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001562464004382491,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6795138888888889,
"signal/frontier_aurc_reward/group_std_mean": 0.0025261019822210074,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.953080100065563e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.953080100065563e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2118411511182785,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8520833333333332,
"signal/frontier_coverage_0/group_std_mean": 0.2758490860462189,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_1/centered_abs_mean": 0.2118411511182785,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8520833333333332,
"signal/frontier_coverage_1/group_std_mean": 0.2758490860462189,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_10/centered_abs_mean": 0.2118411511182785,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8520833333333332,
"signal/frontier_coverage_10/group_std_mean": 0.2758490860462189,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_15/centered_abs_mean": 0.2118411511182785,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8520833333333332,
"signal/frontier_coverage_15/group_std_mean": 0.2758490860462189,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_20/centered_abs_mean": 0.2118411511182785,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8520833333333332,
"signal/frontier_coverage_20/group_std_mean": 0.2758490860462189,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_25/centered_abs_mean": 0.2118411511182785,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8520833333333332,
"signal/frontier_coverage_25/group_std_mean": 0.2758490860462189,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_5/centered_abs_mean": 0.2118411511182785,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8520833333333332,
"signal/frontier_coverage_5/group_std_mean": 0.2758490860462189,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026480144821107387,
"signal/frontier_ece_reward/centered_abs_mean": 0.05060374662280083,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7100694444444444,
"signal/frontier_ece_reward/group_std_mean": 0.06428168565034867,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050603746436536316,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050603746436536316,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22136266827583312,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763888888888889,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.29468963146209715,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022136268392205238,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022136268392205238,
"step": 95
},
{
"calibration/aurc": 0.167972204724354,
"calibration/batch_distribution_entropy": 0.9864342846502329,
"calibration/batch_entropy_100bins": 0.9650832272289168,
"calibration/batch_entropy_10bins": 0.9864342846502329,
"calibration/batch_entropy_50bins": 0.9783947242346824,
"calibration/batch_uniqueness": 0.953848941281708,
"calibration/buffer_distribution_entropy": 0.9315273864669636,
"calibration/buffer_entropy_100bins": 0.9301545524945812,
"calibration/buffer_entropy_10bins": 0.9315273864669636,
"calibration/buffer_entropy_50bins": 0.9434719750608054,
"calibration/confidence_entropy": 0.5021417689027668,
"calibration/coverage@0%": 0.030512946582513044,
"calibration/coverage@1%": 0.030512946582513044,
"calibration/coverage@10%": 0.34233461774347035,
"calibration/coverage@15%": 0.47939107668565634,
"calibration/coverage@20%": 0.7026149703671164,
"calibration/coverage@25%": 0.8276201440877331,
"calibration/coverage@30%": 0.8976031607885394,
"calibration/coverage@5%": 0.11523160748304304,
"calibration/ece": 0.17367782749954952,
"calibration/mean_confidence": 0.5388179797320996,
"calibration/prompt_uniqueness": 0.8750848836612375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008333333333333326,
"completions/max_length": 2917.4,
"completions/max_terminated_length": 2917.4,
"completions/mean_length": 626.88291015625,
"completions/mean_terminated_length": 632.1784790039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.8,
"epoch": 0.23999700003749952,
"grad_norm": 0.0003905615594703704,
"learning_rate": 3.2530120481927713e-06,
"loss": -0.0055,
"num_tokens": 194551649.0,
"reward": 0.9936325430870057,
"reward_std": 0.12192367911338806,
"rewards/accuracy_reward": 0.6775173544883728,
"rewards/brier_reward": 0.7730206847190857,
"rewards/confidence_uniqueness_reward": 0.9442116141319274,
"rewards/format_reward": 0.9916666746139526,
"rewards/frontier_aurc_reward": -0.0014509693486616016,
"rewards/frontier_coverage_0": -8.213166147470474e-05,
"rewards/frontier_coverage_1": -8.213166147470474e-05,
"rewards/frontier_coverage_10": -8.213166147470474e-05,
"rewards/frontier_coverage_15": -8.213166147470474e-05,
"rewards/frontier_coverage_20": -8.213166147470474e-05,
"rewards/frontier_coverage_25": -8.213166147470474e-05,
"rewards/frontier_coverage_5": -8.213166147470474e-05,
"rewards/frontier_ece_reward": 0.01913320329040289,
"rewards/frontier_entropy_batch_reward": -0.14570734947919844,
"signal/accuracy_reward/centered_abs_mean": 0.16055229902267457,
"signal/accuracy_reward/group_bin_occupancy": 0.19930555555555557,
"signal/accuracy_reward/group_std_mean": 0.21070023775100707,
"signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08027614951133728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08027614951133728,
"signal/advantage_abs_mean": 0.090542633831501,
"signal/advantage_pre_scale_abs_mean": 0.090542633831501,
"signal/advantage_pre_scale_std": 0.14465495347976684,
"signal/advantage_std": 0.14465495347976684,
"signal/brier_reward/centered_abs_mean": 0.16810146272182463,
"signal/brier_reward/group_bin_occupancy": 0.8604166666666666,
"signal/brier_reward/group_std_mean": 0.21120634078979492,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016810146719217302,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016810146719217302,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024970437213778496,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8760416666666666,
"signal/confidence_uniqueness_reward/group_std_mean": 0.040504425019025805,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024970436468720438,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024970436468720438,
"signal/format_reward/centered_abs_mean": 0.01360677070915699,
"signal/format_reward/group_bin_occupancy": 0.13923611111111112,
"signal/format_reward/group_std_mean": 0.026751523464918138,
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006803385354578495,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006803385354578495,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001497122971341014,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.679513888888889,
"signal/frontier_aurc_reward/group_std_mean": 0.0024236575700342655,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.871403837867547e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.871403837867547e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.22072286307811737,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8427083333333332,
"signal/frontier_coverage_0/group_std_mean": 0.28572168946266174,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_1/centered_abs_mean": 0.22072286307811737,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8427083333333332,
"signal/frontier_coverage_1/group_std_mean": 0.28572168946266174,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_10/centered_abs_mean": 0.22072286307811737,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8427083333333332,
"signal/frontier_coverage_10/group_std_mean": 0.28572168946266174,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_15/centered_abs_mean": 0.22072286307811737,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8427083333333332,
"signal/frontier_coverage_15/group_std_mean": 0.28572168946266174,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_20/centered_abs_mean": 0.22072286307811737,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8427083333333332,
"signal/frontier_coverage_20/group_std_mean": 0.28572168946266174,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_25/centered_abs_mean": 0.22072286307811737,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8427083333333332,
"signal/frontier_coverage_25/group_std_mean": 0.28572168946266174,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_5/centered_abs_mean": 0.22072286307811737,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8427083333333332,
"signal/frontier_coverage_5/group_std_mean": 0.28572168946266174,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027590358164161443,
"signal/frontier_ece_reward/centered_abs_mean": 0.04990529865026474,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7184027777777777,
"signal/frontier_ece_reward/group_std_mean": 0.06357247680425644,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004990530014038086,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004990530014038086,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22153306305408477,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7583333333333333,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2920175909996033,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022153307124972342,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022153307124972342,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_calibration/aurc": 0.1696630845347579,
"eval_calibration/batch_distribution_entropy": 0.9338246735522308,
"eval_calibration/batch_entropy_100bins": 0.706892459061427,
"eval_calibration/batch_entropy_10bins": 0.9338246735522308,
"eval_calibration/batch_entropy_50bins": 0.7818401869433624,
"eval_calibration/batch_uniqueness": 0.8962181321540061,
"eval_calibration/buffer_distribution_entropy": 0.9351777755636745,
"eval_calibration/buffer_entropy_100bins": 0.9338170230335757,
"eval_calibration/buffer_entropy_10bins": 0.9351777755636745,
"eval_calibration/buffer_entropy_50bins": 0.9465654691196751,
"eval_calibration/confidence_entropy": 0.47687795562464963,
"eval_calibration/coverage@0%": 0.21908602150537634,
"eval_calibration/coverage@1%": 0.21908602150537634,
"eval_calibration/coverage@10%": 0.401377688172043,
"eval_calibration/coverage@15%": 0.4847110215053763,
"eval_calibration/coverage@20%": 0.6584341397849462,
"eval_calibration/coverage@25%": 0.8929771505376344,
"eval_calibration/coverage@30%": 0.946236559139785,
"eval_calibration/coverage@5%": 0.276377688172043,
"eval_calibration/ece": 0.23803624165826145,
"eval_calibration/mean_confidence": 0.569122975052372,
"eval_calibration/prompt_uniqueness": 0.8962181321540061,
"eval_completions/clipped_ratio": 0.010416666666666666,
"eval_completions/max_length": 2299.3333333333335,
"eval_completions/max_terminated_length": 2299.3333333333335,
"eval_completions/mean_length": 617.4854431152344,
"eval_completions/mean_terminated_length": 624.0654602050781,
"eval_completions/min_length": 45.333333333333336,
"eval_completions/min_terminated_length": 213.66666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 194551649.0,
"eval_reward": 0.9322001536687216,
"eval_reward_std": 0.23809615274270376,
"eval_rewards/accuracy_reward": 0.6710069477558136,
"eval_rewards/brier_reward": 0.7738438149293264,
"eval_rewards/confidence_uniqueness_reward": 0.8830358386039734,
"eval_rewards/format_reward": 0.9869791766007742,
"eval_rewards/frontier_aurc_reward": -0.0014650731851967673,
"eval_rewards/frontier_coverage_0": 0.00044721147666374844,
"eval_rewards/frontier_coverage_1": 0.00044721147666374844,
"eval_rewards/frontier_coverage_10": 0.00044721147666374844,
"eval_rewards/frontier_coverage_15": 0.00044721147666374844,
"eval_rewards/frontier_coverage_20": 0.00044721147666374844,
"eval_rewards/frontier_coverage_25": 0.00044721147666374844,
"eval_rewards/frontier_coverage_5": 0.00044721147666374844,
"eval_rewards/frontier_ece_reward": 0.01968886749818921,
"eval_rewards/frontier_entropy_batch_reward": -0.6447059710820516,
"eval_runtime": 205.8691,
"eval_samples_per_second": 4.857,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4316948801279068,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4710538685321808,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2158474400639534,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2158474400639534,
"eval_signal/advantage_abs_mean": 0.20248722285032272,
"eval_signal/advantage_pre_scale_abs_mean": 0.20248722285032272,
"eval_signal/advantage_pre_scale_std": 0.23723148057858148,
"eval_signal/advantage_std": 0.23723148057858148,
"eval_signal/brier_reward/centered_abs_mean": 0.20928792655467987,
"eval_signal/brier_reward/group_bin_occupancy": 0.8854166666666666,
"eval_signal/brier_reward/group_std_mean": 0.2662544945875804,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020928792965908844,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020928792965908844,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.054847310607632004,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09344139198462169,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005484731014197071,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005484731014197071,
"eval_signal/format_reward/centered_abs_mean": 0.025010850590964157,
"eval_signal/format_reward/group_bin_occupancy": 0.17013888888888887,
"eval_signal/format_reward/group_std_mean": 0.06767813861370087,
"eval_signal/format_reward/group_zero_std_frac": 0.6388889104127884,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012505425295482079,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012505425295482079,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020411182777024806,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6631944444444445,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0037244935131942234,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.551397907761081e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.551397907761081e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2758402054508527,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_0/group_std_mean": 0.39653781056404114,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2758402054508527,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.39653781056404114,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2758402054508527,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.39653781056404114,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2758402054508527,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.39653781056404114,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2758402054508527,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.39653781056404114,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2758402054508527,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.39653781056404114,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2758402054508527,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.39653781056404114,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00344800246724238,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05408057694633802,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9097222222222223,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0697250347584486,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005408057787766059,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005408057787766059,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3219749679168065,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2951388888888889,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33611299594243366,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032197498405973114,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032197498405973114,
"eval_steps_per_second": 0.029,
"step": 100
},
{
"calibration/aurc": 0.28984463916978553,
"calibration/batch_distribution_entropy": 0.9711874364190758,
"calibration/batch_entropy_100bins": 0.9559796276599972,
"calibration/batch_entropy_10bins": 0.9711874364190758,
"calibration/batch_entropy_50bins": 0.967740156453336,
"calibration/batch_uniqueness": 0.9505796993365717,
"calibration/buffer_distribution_entropy": 0.936615480617325,
"calibration/buffer_entropy_100bins": 0.9357953238378386,
"calibration/buffer_entropy_10bins": 0.936615480617325,
"calibration/buffer_entropy_50bins": 0.9480214843525481,
"calibration/confidence_entropy": 0.5001631137185123,
"calibration/coverage@0%": 0.01958675450744108,
"calibration/coverage@1%": 0.01958675450744108,
"calibration/coverage@10%": 0.16792198551812815,
"calibration/coverage@15%": 0.18639011882766576,
"calibration/coverage@20%": 0.2856551133219956,
"calibration/coverage@25%": 0.3307244292133193,
"calibration/coverage@30%": 0.48409656797179723,
"calibration/coverage@5%": 0.1408980940299875,
"calibration/ece": 0.14950855344866051,
"calibration/mean_confidence": 0.5798324340804183,
"calibration/prompt_uniqueness": 0.8621616643105219,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666666666675,
"completions/max_length": 3329.6,
"completions/max_terminated_length": 3329.6,
"completions/mean_length": 630.7128662109375,
"completions/mean_terminated_length": 637.3713989257812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.6,
"epoch": 0.2519968500393745,
"grad_norm": 0.0003931356477551162,
"learning_rate": 3.1024096385542172e-06,
"loss": -0.0083,
"num_tokens": 204894325.0,
"reward": 0.9878118515014649,
"reward_std": 0.12439936995506287,
"rewards/accuracy_reward": 0.6724826335906983,
"rewards/brier_reward": 0.7758374571800232,
"rewards/confidence_uniqueness_reward": 0.9409982562065125,
"rewards/format_reward": 0.9895833373069763,
"rewards/frontier_aurc_reward": -0.0015477648237720131,
"rewards/frontier_coverage_0": -0.00018841465935111047,
"rewards/frontier_coverage_1": -0.00018841465935111047,
"rewards/frontier_coverage_10": -0.00018841465935111047,
"rewards/frontier_coverage_15": -0.00018841465935111047,
"rewards/frontier_coverage_20": -0.00018841465935111047,
"rewards/frontier_coverage_25": -0.00018841465935111047,
"rewards/frontier_coverage_5": -0.00018841465935111047,
"rewards/frontier_ece_reward": 0.017811648175120355,
"rewards/frontier_entropy_batch_reward": -0.16650085747241974,
"signal/accuracy_reward/centered_abs_mean": 0.1506781682372093,
"signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776,
"signal/accuracy_reward/group_std_mean": 0.20425570011138916,
"signal/accuracy_reward/group_zero_std_frac": 0.4027777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07533908411860465,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07533908411860465,
"signal/advantage_abs_mean": 0.09059339612722397,
"signal/advantage_pre_scale_abs_mean": 0.09059339612722397,
"signal/advantage_pre_scale_std": 0.14705823063850404,
"signal/advantage_std": 0.14705823063850404,
"signal/brier_reward/centered_abs_mean": 0.1585765987634659,
"signal/brier_reward/group_bin_occupancy": 0.85,
"signal/brier_reward/group_std_mean": 0.20116137266159057,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015857660584151743,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015857660584151743,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028387091308832168,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8545138888888889,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046965491771697995,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002838709158822894,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002838709158822894,
"signal/format_reward/centered_abs_mean": 0.016764323227107525,
"signal/format_reward/group_bin_occupancy": 0.14270833333333335,
"signal/format_reward/group_std_mean": 0.03298989050090313,
"signal/format_reward/group_zero_std_frac": 0.8583333373069764,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008382161613553762,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008382161613553762,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001613885280676186,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6847222222222223,
"signal/frontier_aurc_reward/group_std_mean": 0.0026253133080899717,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.017356746364385e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.017356746364385e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19782112836837767,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_0/group_std_mean": 0.2575752854347229,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_1/centered_abs_mean": 0.19782112836837767,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_1/group_std_mean": 0.2575752854347229,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_10/centered_abs_mean": 0.19782112836837767,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_10/group_std_mean": 0.2575752854347229,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_15/centered_abs_mean": 0.19782112836837767,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_15/group_std_mean": 0.2575752854347229,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_20/centered_abs_mean": 0.19782112836837767,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_20/group_std_mean": 0.2575752854347229,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_25/centered_abs_mean": 0.19782112836837767,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_25/group_std_mean": 0.2575752854347229,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_5/centered_abs_mean": 0.19782112836837767,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_5/group_std_mean": 0.2575752854347229,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024727642070502044,
"signal/frontier_ece_reward/centered_abs_mean": 0.04588953480124473,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7059027777777779,
"signal/frontier_ece_reward/group_std_mean": 0.058351149410009386,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004588953498750925,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004588953498750925,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2355831891298294,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7725694444444444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3069721281528473,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023558317869901656,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023558317869901656,
"step": 105
},
{
"calibration/aurc": 0.16567868451536366,
"calibration/batch_distribution_entropy": 0.9527322950855959,
"calibration/batch_entropy_100bins": 0.947479658399056,
"calibration/batch_entropy_10bins": 0.9527322950855959,
"calibration/batch_entropy_50bins": 0.9567347662661371,
"calibration/batch_uniqueness": 0.9478300016023,
"calibration/buffer_distribution_entropy": 0.9392886191298354,
"calibration/buffer_entropy_100bins": 0.9397206217856423,
"calibration/buffer_entropy_10bins": 0.9392886191298354,
"calibration/buffer_entropy_50bins": 0.9508547762112409,
"calibration/confidence_entropy": 0.5005231604018499,
"calibration/coverage@0%": 0.03288614870450616,
"calibration/coverage@1%": 0.03288614870450616,
"calibration/coverage@10%": 0.3226409737961885,
"calibration/coverage@15%": 0.477594869979861,
"calibration/coverage@20%": 0.6694523645087733,
"calibration/coverage@25%": 0.7880677437475745,
"calibration/coverage@30%": 0.9082701187335092,
"calibration/coverage@5%": 0.14515768917448005,
"calibration/ece": 0.13138400877437387,
"calibration/mean_confidence": 0.6028444186351783,
"calibration/prompt_uniqueness": 0.8709859308023828,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010069444444444419,
"completions/max_length": 3546.2,
"completions/max_terminated_length": 3546.2,
"completions/mean_length": 640.7478271484375,
"completions/mean_terminated_length": 647.2959106445312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.6,
"epoch": 0.2639967000412495,
"grad_norm": 0.0004377875302452594,
"learning_rate": 2.9518072289156627e-06,
"loss": -0.009,
"num_tokens": 215384188.0,
"reward": 1.001729953289032,
"reward_std": 0.1265808016061783,
"rewards/accuracy_reward": 0.7092881917953491,
"rewards/brier_reward": 0.7818510174751282,
"rewards/confidence_uniqueness_reward": 0.9397328972816468,
"rewards/format_reward": 0.9899305582046509,
"rewards/frontier_aurc_reward": -0.001325283572077751,
"rewards/frontier_coverage_0": -0.02074230033904314,
"rewards/frontier_coverage_1": -0.02074230033904314,
"rewards/frontier_coverage_10": -0.02074230033904314,
"rewards/frontier_coverage_15": -0.02074230033904314,
"rewards/frontier_coverage_20": -0.02074230033904314,
"rewards/frontier_coverage_25": -0.02074230033904314,
"rewards/frontier_coverage_5": -0.02074230033904314,
"rewards/frontier_ece_reward": 0.01406394112855196,
"rewards/frontier_entropy_batch_reward": -0.19612716436386107,
"signal/accuracy_reward/centered_abs_mean": 0.15444336235523223,
"signal/accuracy_reward/group_bin_occupancy": 0.20381944444444441,
"signal/accuracy_reward/group_std_mean": 0.21105689704418182,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07722168117761612,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07722168117761612,
"signal/advantage_abs_mean": 0.0925620898604393,
"signal/advantage_pre_scale_abs_mean": 0.0925620898604393,
"signal/advantage_pre_scale_std": 0.15025562345981597,
"signal/advantage_std": 0.15025562345981597,
"signal/brier_reward/centered_abs_mean": 0.15158471167087556,
"signal/brier_reward/group_bin_occupancy": 0.8395833333333332,
"signal/brier_reward/group_std_mean": 0.19477559626102448,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015158471278846264,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015158471278846264,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02864648588001728,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8680555555555556,
"signal/confidence_uniqueness_reward/group_std_mean": 0.045315783470869064,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002864648727700114,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002864648727700114,
"signal/format_reward/centered_abs_mean": 0.01662326380610466,
"signal/format_reward/group_bin_occupancy": 0.14097222222222222,
"signal/format_reward/group_std_mean": 0.03081248588860035,
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00831163190305233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00831163190305233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013894882751628757,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.679513888888889,
"signal/frontier_aurc_reward/group_std_mean": 0.00229809598531574,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7368603403156156e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7368603403156156e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1891954332590103,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8295138888888889,
"signal/frontier_coverage_0/group_std_mean": 0.24967995285987854,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_1/centered_abs_mean": 0.1891954332590103,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8295138888888889,
"signal/frontier_coverage_1/group_std_mean": 0.24967995285987854,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_10/centered_abs_mean": 0.1891954332590103,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8295138888888889,
"signal/frontier_coverage_10/group_std_mean": 0.24967995285987854,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_15/centered_abs_mean": 0.1891954332590103,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8295138888888889,
"signal/frontier_coverage_15/group_std_mean": 0.24967995285987854,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_20/centered_abs_mean": 0.1891954332590103,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8295138888888889,
"signal/frontier_coverage_20/group_std_mean": 0.24967995285987854,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_25/centered_abs_mean": 0.1891954332590103,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8295138888888889,
"signal/frontier_coverage_25/group_std_mean": 0.24967995285987854,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_5/centered_abs_mean": 0.1891954332590103,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8295138888888889,
"signal/frontier_coverage_5/group_std_mean": 0.24967995285987854,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002364942990243435,
"signal/frontier_ece_reward/centered_abs_mean": 0.041539561748504636,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7034722222222223,
"signal/frontier_ece_reward/group_std_mean": 0.05418416783213616,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041539563797414305,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041539563797414305,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2623390406370163,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7722222222222223,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.335136216878891,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026233907043933868,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026233907043933868,
"step": 110
},
{
"calibration/aurc": 0.29473056838071987,
"calibration/batch_distribution_entropy": 0.9619916322012759,
"calibration/batch_entropy_100bins": 0.9549874064799202,
"calibration/batch_entropy_10bins": 0.9619916322012759,
"calibration/batch_entropy_50bins": 0.9647061223514687,
"calibration/batch_uniqueness": 0.9495682126495403,
"calibration/buffer_distribution_entropy": 0.9414297817272864,
"calibration/buffer_entropy_100bins": 0.9430816398527859,
"calibration/buffer_entropy_10bins": 0.9414297817272864,
"calibration/buffer_entropy_50bins": 0.9532562123434823,
"calibration/confidence_entropy": 0.497030786564154,
"calibration/coverage@0%": 0.013913364638495167,
"calibration/coverage@1%": 0.013913364638495167,
"calibration/coverage@10%": 0.040717969175169524,
"calibration/coverage@15%": 0.11740594901471416,
"calibration/coverage@20%": 0.3688631313189874,
"calibration/coverage@25%": 0.47341993382008873,
"calibration/coverage@30%": 0.5718340073342899,
"calibration/coverage@5%": 0.013913364638495167,
"calibration/ece": 0.18056783838522902,
"calibration/mean_confidence": 0.5710495335219846,
"calibration/prompt_uniqueness": 0.8647219156071875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014930555555555535,
"completions/max_length": 3417.4,
"completions/max_terminated_length": 3417.4,
"completions/mean_length": 639.9868041992188,
"completions/mean_terminated_length": 649.7999755859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.6,
"epoch": 0.27599655004312446,
"grad_norm": 0.00040515753789804876,
"learning_rate": 2.8012048192771087e-06,
"loss": -0.0125,
"num_tokens": 225836036.0,
"reward": 0.9746179699897766,
"reward_std": 0.13332333266735077,
"rewards/accuracy_reward": 0.6622395753860474,
"rewards/brier_reward": 0.7698838829994201,
"rewards/confidence_uniqueness_reward": 0.9336004853248596,
"rewards/format_reward": 0.9848090410232544,
"rewards/frontier_aurc_reward": -0.0017310404684394598,
"rewards/frontier_coverage_0": 0.004687186796218157,
"rewards/frontier_coverage_1": 0.004687186796218157,
"rewards/frontier_coverage_10": 0.004687186796218157,
"rewards/frontier_coverage_15": 0.004687186796218157,
"rewards/frontier_coverage_20": 0.004687186796218157,
"rewards/frontier_coverage_25": 0.004687186796218157,
"rewards/frontier_coverage_5": 0.004687186796218157,
"rewards/frontier_ece_reward": 0.016754307225346564,
"rewards/frontier_entropy_batch_reward": -0.213187313079834,
"signal/accuracy_reward/centered_abs_mean": 0.15972764790058136,
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
"signal/accuracy_reward/group_std_mean": 0.206351837515831,
"signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07986382395029068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07986382395029068,
"signal/advantage_abs_mean": 0.09935903698205947,
"signal/advantage_pre_scale_abs_mean": 0.09935903698205947,
"signal/advantage_pre_scale_std": 0.1625169038772583,
"signal/advantage_std": 0.1625169038772583,
"signal/brier_reward/centered_abs_mean": 0.16147418916225434,
"signal/brier_reward/group_bin_occupancy": 0.8649305555555555,
"signal/brier_reward/group_std_mean": 0.20425305664539337,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016147419437766077,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016147419437766077,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036166596412658694,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8284722222222222,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05944165885448456,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003616659576073289,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003616659576073289,
"signal/format_reward/centered_abs_mean": 0.02408311627805233,
"signal/format_reward/group_bin_occupancy": 0.1482638888888889,
"signal/format_reward/group_std_mean": 0.04518638737499714,
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012041558139026165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012041558139026165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017531340941786765,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6899305555555555,
"signal/frontier_aurc_reward/group_std_mean": 0.002778572216629982,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1914176249993035e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1914176249993035e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19187724888324736,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8548611111111111,
"signal/frontier_coverage_0/group_std_mean": 0.25314462184906006,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_1/centered_abs_mean": 0.19187724888324736,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8548611111111111,
"signal/frontier_coverage_1/group_std_mean": 0.25314462184906006,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_10/centered_abs_mean": 0.19187724888324736,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8548611111111111,
"signal/frontier_coverage_10/group_std_mean": 0.25314462184906006,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_15/centered_abs_mean": 0.19187724888324736,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8548611111111111,
"signal/frontier_coverage_15/group_std_mean": 0.25314462184906006,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_20/centered_abs_mean": 0.19187724888324736,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8548611111111111,
"signal/frontier_coverage_20/group_std_mean": 0.25314462184906006,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_25/centered_abs_mean": 0.19187724888324736,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8548611111111111,
"signal/frontier_coverage_25/group_std_mean": 0.25314462184906006,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_5/centered_abs_mean": 0.19187724888324736,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8548611111111111,
"signal/frontier_coverage_5/group_std_mean": 0.25314462184906006,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023984656669199466,
"signal/frontier_ece_reward/centered_abs_mean": 0.04284475669264794,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6947916666666667,
"signal/frontier_ece_reward/group_std_mean": 0.054882925003767014,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004284475743770599,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004284475743770599,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2705032885074615,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7972222222222222,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33811612129211427,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027050328627228736,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027050328627228736,
"step": 115
},
{
"calibration/aurc": 0.2692657849205195,
"calibration/batch_distribution_entropy": 0.9659327417988296,
"calibration/batch_entropy_100bins": 0.9541108697407532,
"calibration/batch_entropy_10bins": 0.9659327417988296,
"calibration/batch_entropy_50bins": 0.9666704637716524,
"calibration/batch_uniqueness": 0.9494544866872708,
"calibration/buffer_distribution_entropy": 0.9450548596655558,
"calibration/buffer_entropy_100bins": 0.946823957265584,
"calibration/buffer_entropy_10bins": 0.9450548596655558,
"calibration/buffer_entropy_50bins": 0.9562487948463927,
"calibration/confidence_entropy": 0.4875023680573502,
"calibration/coverage@0%": 0.017792988425402438,
"calibration/coverage@1%": 0.017792988425402438,
"calibration/coverage@10%": 0.2124956955037931,
"calibration/coverage@15%": 0.4193426391829937,
"calibration/coverage@20%": 0.5009450510714555,
"calibration/coverage@25%": 0.5343342250707565,
"calibration/coverage@30%": 0.566149934383202,
"calibration/coverage@5%": 0.05855147063862448,
"calibration/ece": 0.17746895129703658,
"calibration/mean_confidence": 0.5736739808286649,
"calibration/prompt_uniqueness": 0.865164092609454,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012326388888888906,
"completions/max_length": 3293.2,
"completions/max_terminated_length": 3293.2,
"completions/mean_length": 630.0300415039062,
"completions/mean_terminated_length": 637.8402709960938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.6,
"epoch": 0.28799640004499943,
"grad_norm": 0.0003517817531246692,
"learning_rate": 2.6506024096385547e-06,
"loss": -0.0098,
"num_tokens": 236175838.0,
"reward": 0.9898527383804321,
"reward_std": 0.1258085072040558,
"rewards/accuracy_reward": 0.6803819417953492,
"rewards/brier_reward": 0.7743050813674927,
"rewards/confidence_uniqueness_reward": 0.9388420104980468,
"rewards/format_reward": 0.9875868082046508,
"rewards/frontier_aurc_reward": -0.00158976421225816,
"rewards/frontier_coverage_0": -0.0026821551844477655,
"rewards/frontier_coverage_1": -0.0026821551844477655,
"rewards/frontier_coverage_10": -0.0026821551844477655,
"rewards/frontier_coverage_15": -0.0026821551844477655,
"rewards/frontier_coverage_20": -0.0026821551844477655,
"rewards/frontier_coverage_25": -0.0026821551844477655,
"rewards/frontier_coverage_5": -0.0026821551844477655,
"rewards/frontier_ece_reward": 0.015108131617307664,
"rewards/frontier_entropy_batch_reward": -0.16702641248703004,
"signal/accuracy_reward/centered_abs_mean": 0.15770399272441865,
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
"signal/accuracy_reward/group_std_mean": 0.2057257741689682,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07885199636220933,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07885199636220933,
"signal/advantage_abs_mean": 0.09386955499649048,
"signal/advantage_pre_scale_abs_mean": 0.09386955499649048,
"signal/advantage_pre_scale_std": 0.1530741721391678,
"signal/advantage_std": 0.1530741721391678,
"signal/brier_reward/centered_abs_mean": 0.15571836829185487,
"signal/brier_reward/group_bin_occupancy": 0.8451388888888889,
"signal/brier_reward/group_std_mean": 0.19793447852134705,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015571837686002255,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015571837686002255,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03023452088236809,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04891353026032448,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030234521254897118,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030234521254897118,
"signal/format_reward/centered_abs_mean": 0.018983289785683154,
"signal/format_reward/group_bin_occupancy": 0.14340277777777777,
"signal/format_reward/group_std_mean": 0.03533447273075581,
"signal/format_reward/group_zero_std_frac": 0.8527777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009491644892841577,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009491644892841577,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016145243542268872,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7020833333333334,
"signal/frontier_aurc_reward/group_std_mean": 0.0025344877038151028,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0181555009912698e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0181555009912698e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19334494173526764,
"signal/frontier_coverage_0/group_bin_occupancy": 0.842013888888889,
"signal/frontier_coverage_0/group_std_mean": 0.253934046626091,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_1/centered_abs_mean": 0.19334494173526764,
"signal/frontier_coverage_1/group_bin_occupancy": 0.842013888888889,
"signal/frontier_coverage_1/group_std_mean": 0.253934046626091,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_10/centered_abs_mean": 0.19334494173526764,
"signal/frontier_coverage_10/group_bin_occupancy": 0.842013888888889,
"signal/frontier_coverage_10/group_std_mean": 0.253934046626091,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_15/centered_abs_mean": 0.19334494173526764,
"signal/frontier_coverage_15/group_bin_occupancy": 0.842013888888889,
"signal/frontier_coverage_15/group_std_mean": 0.253934046626091,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_20/centered_abs_mean": 0.19334494173526764,
"signal/frontier_coverage_20/group_bin_occupancy": 0.842013888888889,
"signal/frontier_coverage_20/group_std_mean": 0.253934046626091,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_25/centered_abs_mean": 0.19334494173526764,
"signal/frontier_coverage_25/group_bin_occupancy": 0.842013888888889,
"signal/frontier_coverage_25/group_std_mean": 0.253934046626091,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_5/centered_abs_mean": 0.19334494173526764,
"signal/frontier_coverage_5/group_bin_occupancy": 0.842013888888889,
"signal/frontier_coverage_5/group_std_mean": 0.253934046626091,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002416811836883426,
"signal/frontier_ece_reward/centered_abs_mean": 0.041843322664499284,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6902777777777777,
"signal/frontier_ece_reward/group_std_mean": 0.05341664999723435,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004184332210570574,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004184332210570574,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22915047109127046,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7739583333333333,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.29721260666847227,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02291504740715027,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02291504740715027,
"step": 120
},
{
"calibration/aurc": 0.19007498936195838,
"calibration/batch_distribution_entropy": 0.9497641139330535,
"calibration/batch_entropy_100bins": 0.9439166973372888,
"calibration/batch_entropy_10bins": 0.9497641139330535,
"calibration/batch_entropy_50bins": 0.9538831122680052,
"calibration/batch_uniqueness": 0.9468067326996552,
"calibration/buffer_distribution_entropy": 0.94693078276862,
"calibration/buffer_entropy_100bins": 0.9496853046137248,
"calibration/buffer_entropy_10bins": 0.94693078276862,
"calibration/buffer_entropy_50bins": 0.9582811996332786,
"calibration/confidence_entropy": 0.4982449646222564,
"calibration/coverage@0%": 0.0068008289703315895,
"calibration/coverage@1%": 0.0068008289703315895,
"calibration/coverage@10%": 0.3090450479930192,
"calibration/coverage@15%": 0.3981652644399026,
"calibration/coverage@20%": 0.5950416503253889,
"calibration/coverage@25%": 0.7859338837229998,
"calibration/coverage@30%": 0.8696369763562011,
"calibration/coverage@5%": 0.16477966841186736,
"calibration/ece": 0.13885856134762334,
"calibration/mean_confidence": 0.6056185988614311,
"calibration/prompt_uniqueness": 0.860661265331734,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00980902777777779,
"completions/max_length": 3500.2,
"completions/max_terminated_length": 3500.2,
"completions/mean_length": 624.0658935546875,
"completions/mean_terminated_length": 630.3100341796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.2999962500468744,
"grad_norm": 0.00044128746958449483,
"learning_rate": 2.5e-06,
"loss": -0.0094,
"num_tokens": 246482741.0,
"reward": 0.9921006441116333,
"reward_std": 0.12561330199241638,
"rewards/accuracy_reward": 0.6873263835906982,
"rewards/brier_reward": 0.7992384910583497,
"rewards/confidence_uniqueness_reward": 0.9376503348350524,
"rewards/format_reward": 0.9897569417953491,
"rewards/frontier_aurc_reward": -0.001279058470390737,
"rewards/frontier_coverage_0": 0.006591923534870148,
"rewards/frontier_coverage_1": 0.006591923534870148,
"rewards/frontier_coverage_10": 0.006591923534870148,
"rewards/frontier_coverage_15": 0.006591923534870148,
"rewards/frontier_coverage_20": 0.006591923534870148,
"rewards/frontier_coverage_25": 0.006591923534870148,
"rewards/frontier_coverage_5": 0.006591923534870148,
"rewards/frontier_ece_reward": 0.01775702629238367,
"rewards/frontier_entropy_batch_reward": -0.22466442584991456,
"signal/accuracy_reward/centered_abs_mean": 0.15937500298023224,
"signal/accuracy_reward/group_bin_occupancy": 0.19687499999999997,
"signal/accuracy_reward/group_std_mean": 0.20706891417503356,
"signal/accuracy_reward/group_zero_std_frac": 0.42499999403953553,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07968750149011612,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07968750149011612,
"signal/advantage_abs_mean": 0.09454565495252609,
"signal/advantage_pre_scale_abs_mean": 0.09454565495252609,
"signal/advantage_pre_scale_std": 0.15264492034912108,
"signal/advantage_std": 0.15264492034912108,
"signal/brier_reward/centered_abs_mean": 0.1370186984539032,
"signal/brier_reward/group_bin_occupancy": 0.8465277777777779,
"signal/brier_reward/group_std_mean": 0.17555441856384277,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013701869174838066,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013701869174838066,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030255821347236634,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8583333333333334,
"signal/confidence_uniqueness_reward/group_std_mean": 0.047763481736183167,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030255821999162435,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030255821999162435,
"signal/format_reward/centered_abs_mean": 0.01732855923473835,
"signal/format_reward/group_bin_occupancy": 0.14166666666666666,
"signal/format_reward/group_std_mean": 0.03218508400022983,
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008664279617369175,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008664279617369175,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014055859064683318,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6861111111111111,
"signal/frontier_aurc_reward/group_std_mean": 0.002288359007798135,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.756982401275309e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.756982401275309e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1769299864768982,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_0/group_std_mean": 0.23164838552474976,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_1/centered_abs_mean": 0.1769299864768982,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_1/group_std_mean": 0.23164838552474976,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_10/centered_abs_mean": 0.1769299864768982,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_10/group_std_mean": 0.23164838552474976,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_15/centered_abs_mean": 0.1769299864768982,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_15/group_std_mean": 0.23164838552474976,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_20/centered_abs_mean": 0.1769299864768982,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_20/group_std_mean": 0.23164838552474976,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_25/centered_abs_mean": 0.1769299864768982,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_25/group_std_mean": 0.23164838552474976,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_5/centered_abs_mean": 0.1769299864768982,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_5/group_std_mean": 0.23164838552474976,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002211624849587679,
"signal/frontier_ece_reward/centered_abs_mean": 0.038429119437932965,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6680555555555555,
"signal/frontier_ece_reward/group_std_mean": 0.04894906431436539,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038429120555520057,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038429120555520057,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2667219638824463,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.79375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3330686569213867,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02667219564318657,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02667219564318657,
"step": 125
},
{
"calibration/aurc": 0.2570677552976662,
"calibration/batch_distribution_entropy": 0.9707298208948314,
"calibration/batch_entropy_100bins": 0.9563656907393726,
"calibration/batch_entropy_10bins": 0.9707298208948314,
"calibration/batch_entropy_50bins": 0.968625931346003,
"calibration/batch_uniqueness": 0.9509968355199117,
"calibration/buffer_distribution_entropy": 0.9483898179653849,
"calibration/buffer_entropy_100bins": 0.9520036670790271,
"calibration/buffer_entropy_10bins": 0.9483898179653849,
"calibration/buffer_entropy_50bins": 0.9599194290242906,
"calibration/confidence_entropy": 0.5047739615640212,
"calibration/coverage@0%": 0.020457317272494398,
"calibration/coverage@1%": 0.020457317272494398,
"calibration/coverage@10%": 0.08590234345050488,
"calibration/coverage@15%": 0.2708854907636981,
"calibration/coverage@20%": 0.43295266504226493,
"calibration/coverage@25%": 0.5562714025652029,
"calibration/coverage@30%": 0.6542660724253092,
"calibration/coverage@5%": 0.027263599995007482,
"calibration/ece": 0.15055879786288034,
"calibration/mean_confidence": 0.5460301453849368,
"calibration/prompt_uniqueness": 0.8671654591324399,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01692708333333335,
"completions/max_length": 3702.0,
"completions/max_terminated_length": 3702.0,
"completions/mean_length": 650.5223022460938,
"completions/mean_terminated_length": 661.8441528320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.4,
"epoch": 0.3119961000487494,
"grad_norm": 0.0004091080045327544,
"learning_rate": 2.349397590361446e-06,
"loss": -0.0137,
"num_tokens": 257101558.0,
"reward": 0.9774605631828308,
"reward_std": 0.134475240111351,
"rewards/accuracy_reward": 0.6628472208976746,
"rewards/brier_reward": 0.7783628225326538,
"rewards/confidence_uniqueness_reward": 0.9335785031318664,
"rewards/format_reward": 0.9829861044883728,
"rewards/frontier_aurc_reward": -0.0014200884848833083,
"rewards/frontier_coverage_0": 0.006945094745606184,
"rewards/frontier_coverage_1": 0.006945094745606184,
"rewards/frontier_coverage_10": 0.006945094745606184,
"rewards/frontier_coverage_15": 0.006945094745606184,
"rewards/frontier_coverage_20": 0.006945094745606184,
"rewards/frontier_coverage_25": 0.006945094745606184,
"rewards/frontier_coverage_5": 0.006945094745606184,
"rewards/frontier_ece_reward": 0.013097218424081802,
"rewards/frontier_entropy_batch_reward": -0.18549902439117433,
"signal/accuracy_reward/centered_abs_mean": 0.16636284589767455,
"signal/accuracy_reward/group_bin_occupancy": 0.20243055555555559,
"signal/accuracy_reward/group_std_mean": 0.21947809755802156,
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08318142294883728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08318142294883728,
"signal/advantage_abs_mean": 0.10097838938236237,
"signal/advantage_pre_scale_abs_mean": 0.10097838938236237,
"signal/advantage_pre_scale_std": 0.16073089241981506,
"signal/advantage_std": 0.16073089241981506,
"signal/brier_reward/centered_abs_mean": 0.1482792615890503,
"signal/brier_reward/group_bin_occupancy": 0.8479166666666668,
"signal/brier_reward/group_std_mean": 0.1903451293706894,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014827927015721798,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014827927015721798,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03590902425348759,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8461805555555555,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05420064702630043,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035909025464206934,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035909025464206934,
"signal/format_reward/centered_abs_mean": 0.02457682266831398,
"signal/format_reward/group_bin_occupancy": 0.14409722222222224,
"signal/format_reward/group_std_mean": 0.04063734821975231,
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01228841133415699,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01228841133415699,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014170024311169981,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7017361111111111,
"signal/frontier_aurc_reward/group_std_mean": 0.002285012090578675,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7712531553115694e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7712531553115694e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19146940410137175,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8520833333333334,
"signal/frontier_coverage_0/group_std_mean": 0.25203768312931063,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_1/centered_abs_mean": 0.19146940410137175,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8520833333333334,
"signal/frontier_coverage_1/group_std_mean": 0.25203768312931063,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_10/centered_abs_mean": 0.19146940410137175,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8520833333333334,
"signal/frontier_coverage_10/group_std_mean": 0.25203768312931063,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_15/centered_abs_mean": 0.19146940410137175,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8520833333333334,
"signal/frontier_coverage_15/group_std_mean": 0.25203768312931063,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_20/centered_abs_mean": 0.19146940410137175,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8520833333333334,
"signal/frontier_coverage_20/group_std_mean": 0.25203768312931063,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_25/centered_abs_mean": 0.19146940410137175,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8520833333333334,
"signal/frontier_coverage_25/group_std_mean": 0.25203768312931063,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_5/centered_abs_mean": 0.19146940410137175,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8520833333333334,
"signal/frontier_coverage_5/group_std_mean": 0.25203768312931063,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023933676537126304,
"signal/frontier_ece_reward/centered_abs_mean": 0.03642968088388443,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6868055555555557,
"signal/frontier_ece_reward/group_std_mean": 0.04717910811305046,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036429683677852154,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036429683677852154,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24993859529495238,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7871527777777778,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31895039677619935,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024993859976530076,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024993859976530076,
"step": 130
},
{
"calibration/aurc": 0.23095194989951517,
"calibration/batch_distribution_entropy": 0.9421492482556129,
"calibration/batch_entropy_100bins": 0.9440415676294579,
"calibration/batch_entropy_10bins": 0.9421492482556129,
"calibration/batch_entropy_50bins": 0.9520449592729043,
"calibration/batch_uniqueness": 0.9453759538189441,
"calibration/buffer_distribution_entropy": 0.950428047537104,
"calibration/buffer_entropy_100bins": 0.9543930678343209,
"calibration/buffer_entropy_10bins": 0.950428047537104,
"calibration/buffer_entropy_50bins": 0.9617481873791439,
"calibration/confidence_entropy": 0.47065056430076924,
"calibration/coverage@0%": 0.04189883140091326,
"calibration/coverage@1%": 0.04189883140091326,
"calibration/coverage@10%": 0.3034366940325464,
"calibration/coverage@15%": 0.3935276632602748,
"calibration/coverage@20%": 0.48246784032256607,
"calibration/coverage@25%": 0.5515483076447136,
"calibration/coverage@30%": 0.6815676706121045,
"calibration/coverage@5%": 0.17324618582756293,
"calibration/ece": 0.14784717956359078,
"calibration/mean_confidence": 0.6212378872019609,
"calibration/prompt_uniqueness": 0.8551414614959321,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011805555555555536,
"completions/max_length": 3302.8,
"completions/max_terminated_length": 3302.8,
"completions/mean_length": 626.293505859375,
"completions/mean_terminated_length": 633.8015014648438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.4,
"epoch": 0.32399595005062437,
"grad_norm": 0.0004363918851595372,
"learning_rate": 2.1987951807228917e-06,
"loss": -0.0106,
"num_tokens": 267409483.0,
"reward": 0.9906501173973083,
"reward_std": 0.12640073150396347,
"rewards/accuracy_reward": 0.6786458253860473,
"rewards/brier_reward": 0.7910825252532959,
"rewards/confidence_uniqueness_reward": 0.9383931279182434,
"rewards/format_reward": 0.9881076216697693,
"rewards/frontier_aurc_reward": -0.0013120988383889197,
"rewards/frontier_coverage_0": 0.014272965677082538,
"rewards/frontier_coverage_1": 0.014272965677082538,
"rewards/frontier_coverage_10": 0.014272965677082538,
"rewards/frontier_coverage_15": 0.014272965677082538,
"rewards/frontier_coverage_20": 0.014272965677082538,
"rewards/frontier_coverage_25": 0.014272965677082538,
"rewards/frontier_coverage_5": 0.014272965677082538,
"rewards/frontier_ece_reward": 0.016592884063720705,
"rewards/frontier_entropy_batch_reward": -0.18565942943096161,
"signal/accuracy_reward/centered_abs_mean": 0.15636935830116272,
"signal/accuracy_reward/group_bin_occupancy": 0.19722222222222224,
"signal/accuracy_reward/group_std_mean": 0.20537342131137848,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07818467915058136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07818467915058136,
"signal/advantage_abs_mean": 0.0925526574254036,
"signal/advantage_pre_scale_abs_mean": 0.0925526574254036,
"signal/advantage_pre_scale_std": 0.15250625014305114,
"signal/advantage_std": 0.15250625014305114,
"signal/brier_reward/centered_abs_mean": 0.14853745102882385,
"signal/brier_reward/group_bin_occupancy": 0.8354166666666668,
"signal/brier_reward/group_std_mean": 0.1917984515428543,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014853744953870773,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014853744953870773,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03174131475389004,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8378472222222222,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05302174612879753,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031741314101964234,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031741314101964234,
"signal/format_reward/centered_abs_mean": 0.02053493894636631,
"signal/format_reward/group_bin_occupancy": 0.14583333333333334,
"signal/format_reward/group_std_mean": 0.039642113447189334,
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010267469473183155,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010267469473183155,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014899963280186057,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6937500000000001,
"signal/frontier_aurc_reward/group_std_mean": 0.002411051280796528,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.862495373643469e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.862495373643469e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19647094905376433,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8375,
"signal/frontier_coverage_0/group_std_mean": 0.25708119869232177,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_1/centered_abs_mean": 0.19647094905376433,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8375,
"signal/frontier_coverage_1/group_std_mean": 0.25708119869232177,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_10/centered_abs_mean": 0.19647094905376433,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8375,
"signal/frontier_coverage_10/group_std_mean": 0.25708119869232177,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_15/centered_abs_mean": 0.19647094905376433,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8375,
"signal/frontier_coverage_15/group_std_mean": 0.25708119869232177,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_20/centered_abs_mean": 0.19647094905376433,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8375,
"signal/frontier_coverage_20/group_std_mean": 0.25708119869232177,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_25/centered_abs_mean": 0.19647094905376433,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8375,
"signal/frontier_coverage_25/group_std_mean": 0.25708119869232177,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_5/centered_abs_mean": 0.19647094905376433,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8375,
"signal/frontier_coverage_5/group_std_mean": 0.25708119869232177,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024558869190514088,
"signal/frontier_ece_reward/centered_abs_mean": 0.03865344226360321,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6791666666666667,
"signal/frontier_ece_reward/group_std_mean": 0.04832939356565476,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038653444964438675,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038653444964438675,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23874907791614533,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773611111111111,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3072973072528839,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023874907195568083,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023874907195568083,
"step": 135
},
{
"calibration/aurc": 0.1294049869245013,
"calibration/batch_distribution_entropy": 0.9557180391089094,
"calibration/batch_entropy_100bins": 0.9497484635160689,
"calibration/batch_entropy_10bins": 0.9557180391089094,
"calibration/batch_entropy_50bins": 0.9591875124129359,
"calibration/batch_uniqueness": 0.9468833336742613,
"calibration/buffer_distribution_entropy": 0.9549444448123883,
"calibration/buffer_entropy_100bins": 0.9602670469433393,
"calibration/buffer_entropy_10bins": 0.9549444448123883,
"calibration/buffer_entropy_50bins": 0.9660572803453091,
"calibration/confidence_entropy": 0.4833540273370396,
"calibration/coverage@0%": 0.05841797685887148,
"calibration/coverage@1%": 0.05841797685887148,
"calibration/coverage@10%": 0.48837748487367827,
"calibration/coverage@15%": 0.6872088995406774,
"calibration/coverage@20%": 0.8049094782025777,
"calibration/coverage@25%": 0.9049543537246117,
"calibration/coverage@30%": 0.9723005208262135,
"calibration/coverage@5%": 0.16348598057755775,
"calibration/ece": 0.12621167596611338,
"calibration/mean_confidence": 0.5960379852244188,
"calibration/prompt_uniqueness": 0.8497180192039384,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012586805555555558,
"completions/max_length": 3468.0,
"completions/max_terminated_length": 3468.0,
"completions/mean_length": 624.9230834960938,
"completions/mean_terminated_length": 632.8444091796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.2,
"epoch": 0.33599580005249935,
"grad_norm": 0.00038522930117323995,
"learning_rate": 2.0481927710843377e-06,
"loss": -0.0106,
"num_tokens": 277712821.0,
"reward": 0.9888324856758117,
"reward_std": 0.12152263075113297,
"rewards/accuracy_reward": 0.6763020873069763,
"rewards/brier_reward": 0.7869715809822082,
"rewards/confidence_uniqueness_reward": 0.9377409458160401,
"rewards/format_reward": 0.9873264074325562,
"rewards/frontier_aurc_reward": -0.0012948142597451807,
"rewards/frontier_coverage_0": 0.013374109752476215,
"rewards/frontier_coverage_1": 0.013374109752476215,
"rewards/frontier_coverage_10": 0.013374109752476215,
"rewards/frontier_coverage_15": 0.013374109752476215,
"rewards/frontier_coverage_20": 0.013374109752476215,
"rewards/frontier_coverage_25": 0.013374109752476215,
"rewards/frontier_coverage_5": 0.013374109752476215,
"rewards/frontier_ece_reward": 0.013652277737855911,
"rewards/frontier_entropy_batch_reward": -0.17972289621829987,
"signal/accuracy_reward/centered_abs_mean": 0.1446994349360466,
"signal/accuracy_reward/group_bin_occupancy": 0.1954861111111111,
"signal/accuracy_reward/group_std_mean": 0.19354265332221984,
"signal/accuracy_reward/group_zero_std_frac": 0.43611112236976624,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0723497174680233,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0723497174680233,
"signal/advantage_abs_mean": 0.08829654604196549,
"signal/advantage_pre_scale_abs_mean": 0.08829654604196549,
"signal/advantage_pre_scale_std": 0.1477721154689789,
"signal/advantage_std": 0.1477721154689789,
"signal/brier_reward/centered_abs_mean": 0.1490771532058716,
"signal/brier_reward/group_bin_occupancy": 0.8548611111111111,
"signal/brier_reward/group_std_mean": 0.1905330777168274,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014907715283334256,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014907715283334256,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031153790652751923,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.829861111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05239210352301597,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031153791584074496,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031153791584074496,
"signal/format_reward/centered_abs_mean": 0.019932725466787815,
"signal/format_reward/group_bin_occupancy": 0.14583333333333331,
"signal/format_reward/group_std_mean": 0.03907729685306549,
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009966362733393908,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009966362733393908,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014448148664087056,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6923611111111112,
"signal/frontier_aurc_reward/group_std_mean": 0.002320256642997265,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8060186630464158e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8060186630464158e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19429327845573424,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8440972222222222,
"signal/frontier_coverage_0/group_std_mean": 0.2543476581573486,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_1/centered_abs_mean": 0.19429327845573424,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8440972222222222,
"signal/frontier_coverage_1/group_std_mean": 0.2543476581573486,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_10/centered_abs_mean": 0.19429327845573424,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8440972222222222,
"signal/frontier_coverage_10/group_std_mean": 0.2543476581573486,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_15/centered_abs_mean": 0.19429327845573424,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8440972222222222,
"signal/frontier_coverage_15/group_std_mean": 0.2543476581573486,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_20/centered_abs_mean": 0.19429327845573424,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8440972222222222,
"signal/frontier_coverage_20/group_std_mean": 0.2543476581573486,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_25/centered_abs_mean": 0.19429327845573424,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8440972222222222,
"signal/frontier_coverage_25/group_std_mean": 0.2543476581573486,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_5/centered_abs_mean": 0.19429327845573424,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8440972222222222,
"signal/frontier_coverage_5/group_std_mean": 0.2543476581573486,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00242866612970829,
"signal/frontier_ece_reward/centered_abs_mean": 0.035150817781686786,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6836805555555556,
"signal/frontier_ece_reward/group_std_mean": 0.04459300860762596,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035150818061083556,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035150818061083556,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23712966442108155,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.779513888888889,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3046766459941864,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023712967336177827,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023712967336177827,
"step": 140
},
{
"calibration/aurc": 0.18683794065072812,
"calibration/batch_distribution_entropy": 0.9806387486177448,
"calibration/batch_entropy_100bins": 0.9596139831547141,
"calibration/batch_entropy_10bins": 0.9806387486177448,
"calibration/batch_entropy_50bins": 0.9722668151489826,
"calibration/batch_uniqueness": 0.9519202784227648,
"calibration/buffer_distribution_entropy": 0.965098347204146,
"calibration/buffer_entropy_100bins": 0.9711153442674696,
"calibration/buffer_entropy_10bins": 0.965098347204146,
"calibration/buffer_entropy_50bins": 0.9745830308057396,
"calibration/confidence_entropy": 0.49438751115496604,
"calibration/coverage@0%": 0.023666604008716417,
"calibration/coverage@1%": 0.023666604008716417,
"calibration/coverage@10%": 0.34799730209956625,
"calibration/coverage@15%": 0.48327203315425693,
"calibration/coverage@20%": 0.6064102691678379,
"calibration/coverage@25%": 0.7282333990057179,
"calibration/coverage@30%": 0.8306165917741957,
"calibration/coverage@5%": 0.05673747015044871,
"calibration/ece": 0.14025309224842797,
"calibration/mean_confidence": 0.5135840354133834,
"calibration/prompt_uniqueness": 0.8570269969432882,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0109375,
"completions/max_length": 3256.0,
"completions/max_terminated_length": 3256.0,
"completions/mean_length": 611.480029296875,
"completions/mean_terminated_length": 618.2994384765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.4,
"epoch": 0.34799565005437433,
"grad_norm": 0.0003621011273935437,
"learning_rate": 1.8975903614457832e-06,
"loss": -0.0102,
"num_tokens": 287821679.0,
"reward": 1.004742157459259,
"reward_std": 0.11290163099765778,
"rewards/accuracy_reward": 0.7052951335906983,
"rewards/brier_reward": 0.7879093527793884,
"rewards/confidence_uniqueness_reward": 0.9406887650489807,
"rewards/format_reward": 0.9889756917953492,
"rewards/frontier_aurc_reward": -0.0011246049660257995,
"rewards/frontier_coverage_0": -0.0045726167038083075,
"rewards/frontier_coverage_1": -0.0045726167038083075,
"rewards/frontier_coverage_10": -0.0045726167038083075,
"rewards/frontier_coverage_15": -0.0045726167038083075,
"rewards/frontier_coverage_20": -0.0045726167038083075,
"rewards/frontier_coverage_25": -0.005139388330280781,
"rewards/frontier_coverage_5": -0.0045726167038083075,
"rewards/frontier_ece_reward": 0.008927960135042667,
"rewards/frontier_entropy_batch_reward": -0.1572466716170311,
"signal/accuracy_reward/centered_abs_mean": 0.1355523034930229,
"signal/accuracy_reward/group_bin_occupancy": 0.19305555555555556,
"signal/accuracy_reward/group_std_mean": 0.1837514191865921,
"signal/accuracy_reward/group_zero_std_frac": 0.45555557012557985,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06777615174651146,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06777615174651146,
"signal/advantage_abs_mean": 0.08125371336936951,
"signal/advantage_pre_scale_abs_mean": 0.08125371336936951,
"signal/advantage_pre_scale_std": 0.140377277135849,
"signal/advantage_std": 0.140377277135849,
"signal/brier_reward/centered_abs_mean": 0.14059088230133057,
"signal/brier_reward/group_bin_occupancy": 0.8302083333333334,
"signal/brier_reward/group_std_mean": 0.18201070427894592,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014059088379144668,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014059088379144668,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029994430020451546,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8548611111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04794644489884377,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029994430486112835,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029994430486112835,
"signal/format_reward/centered_abs_mean": 0.01914605051279068,
"signal/format_reward/group_bin_occupancy": 0.14201388888888888,
"signal/format_reward/group_std_mean": 0.03469080775976181,
"signal/format_reward/group_zero_std_frac": 0.8638888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00957302525639534,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00957302525639534,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013646916137076913,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6895833333333333,
"signal/frontier_aurc_reward/group_std_mean": 0.0023204814875498413,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7058645971701482e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7058645971701482e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19198527336120605,
"signal/frontier_coverage_0/group_bin_occupancy": 0.821875,
"signal/frontier_coverage_0/group_std_mean": 0.2514296382665634,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_1/centered_abs_mean": 0.19198527336120605,
"signal/frontier_coverage_1/group_bin_occupancy": 0.821875,
"signal/frontier_coverage_1/group_std_mean": 0.2514296382665634,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_10/centered_abs_mean": 0.19198527336120605,
"signal/frontier_coverage_10/group_bin_occupancy": 0.821875,
"signal/frontier_coverage_10/group_std_mean": 0.2514296382665634,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_15/centered_abs_mean": 0.19198527336120605,
"signal/frontier_coverage_15/group_bin_occupancy": 0.821875,
"signal/frontier_coverage_15/group_std_mean": 0.2514296382665634,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_20/centered_abs_mean": 0.19198527336120605,
"signal/frontier_coverage_20/group_bin_occupancy": 0.821875,
"signal/frontier_coverage_20/group_std_mean": 0.2514296382665634,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_25/centered_abs_mean": 0.19039924442768097,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8211805555555557,
"signal/frontier_coverage_25/group_std_mean": 0.24940116107463836,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002379990741610527,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002379990741610527,
"signal/frontier_coverage_5/centered_abs_mean": 0.19198527336120605,
"signal/frontier_coverage_5/group_bin_occupancy": 0.821875,
"signal/frontier_coverage_5/group_std_mean": 0.2514296382665634,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002399816084653139,
"signal/frontier_ece_reward/centered_abs_mean": 0.030152727663517,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6826388888888889,
"signal/frontier_ece_reward/group_std_mean": 0.03811613321304321,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030152729246765374,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030152729246765374,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2264914721250534,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7663194444444444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2937849909067154,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022649147361516953,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022649147361516953,
"step": 145
},
{
"calibration/aurc": 0.168028061806691,
"calibration/batch_distribution_entropy": 0.9503921341637899,
"calibration/batch_entropy_100bins": 0.9454745464314603,
"calibration/batch_entropy_10bins": 0.9503921341637899,
"calibration/batch_entropy_50bins": 0.9544974811591391,
"calibration/batch_uniqueness": 0.9470963432074356,
"calibration/buffer_distribution_entropy": 0.973697032680677,
"calibration/buffer_entropy_100bins": 0.9799372478415842,
"calibration/buffer_entropy_10bins": 0.973697032680677,
"calibration/buffer_entropy_50bins": 0.9815119596059615,
"calibration/confidence_entropy": 0.46900084214722726,
"calibration/coverage@0%": 0.0677531953645879,
"calibration/coverage@1%": 0.08203890965030221,
"calibration/coverage@10%": 0.40898672177949313,
"calibration/coverage@15%": 0.5754278277850677,
"calibration/coverage@20%": 0.6407174776379441,
"calibration/coverage@25%": 0.6956564603220498,
"calibration/coverage@30%": 0.7747426797957127,
"calibration/coverage@5%": 0.3361056663766333,
"calibration/ece": 0.1727506317152436,
"calibration/mean_confidence": 0.5696691972139503,
"calibration/prompt_uniqueness": 0.8616723242905033,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009548611111111115,
"completions/max_length": 3496.8,
"completions/max_terminated_length": 3496.8,
"completions/mean_length": 664.9171875,
"completions/mean_terminated_length": 671.283251953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.4,
"epoch": 0.3599955000562493,
"grad_norm": 0.00037436833372339606,
"learning_rate": 1.7469879518072292e-06,
"loss": -0.0077,
"num_tokens": 298591861.0,
"reward": 0.9980527639389039,
"reward_std": 0.12233888059854507,
"rewards/accuracy_reward": 0.6992187619209289,
"rewards/brier_reward": 0.802030611038208,
"rewards/confidence_uniqueness_reward": 0.9381472826004028,
"rewards/format_reward": 0.9902777791023254,
"rewards/frontier_aurc_reward": -0.001526768645271659,
"rewards/frontier_coverage_0": 0.011462044250220061,
"rewards/frontier_coverage_1": 0.011462044250220061,
"rewards/frontier_coverage_10": 0.011462044250220061,
"rewards/frontier_coverage_15": 0.011462044250220061,
"rewards/frontier_coverage_20": 0.01288942052051425,
"rewards/frontier_coverage_25": 0.03849505893886089,
"rewards/frontier_coverage_5": 0.011462044250220061,
"rewards/frontier_ece_reward": 0.007876492012292147,
"rewards/frontier_entropy_batch_reward": -0.22840518951416017,
"signal/accuracy_reward/centered_abs_mean": 0.14632704257965087,
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
"signal/accuracy_reward/group_std_mean": 0.1965962290763855,
"signal/accuracy_reward/group_zero_std_frac": 0.42500001192092896,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07316352128982544,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07316352128982544,
"signal/advantage_abs_mean": 0.08877704441547393,
"signal/advantage_pre_scale_abs_mean": 0.08877704441547393,
"signal/advantage_pre_scale_std": 0.14888640940189363,
"signal/advantage_std": 0.14888640940189363,
"signal/brier_reward/centered_abs_mean": 0.14254556894302367,
"signal/brier_reward/group_bin_occupancy": 0.8364583333333334,
"signal/brier_reward/group_std_mean": 0.18436427116394044,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014254557155072688,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014254557155072688,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029425183311104774,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8427083333333332,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05025056228041649,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002942518377676606,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002942518377676606,
"signal/format_reward/centered_abs_mean": 0.01655815988779068,
"signal/format_reward/group_bin_occupancy": 0.14479166666666668,
"signal/format_reward/group_std_mean": 0.034861961379647255,
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00827907994389534,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00827907994389534,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00209680434782058,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6986111111111111,
"signal/frontier_aurc_reward/group_std_mean": 0.003620346961542964,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6210054784314707e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6210054784314707e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18070359230041505,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8350694444444444,
"signal/frontier_coverage_0/group_std_mean": 0.23828611075878142,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_1/centered_abs_mean": 0.18070359230041505,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8350694444444444,
"signal/frontier_coverage_1/group_std_mean": 0.23828611075878142,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_10/centered_abs_mean": 0.18070359230041505,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8350694444444444,
"signal/frontier_coverage_10/group_std_mean": 0.23828611075878142,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_15/centered_abs_mean": 0.18070359230041505,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8350694444444444,
"signal/frontier_coverage_15/group_std_mean": 0.23828611075878142,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_20/centered_abs_mean": 0.17324572205543518,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8274305555555556,
"signal/frontier_coverage_20/group_std_mean": 0.22867600619792938,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021655716467648745,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021655716467648745,
"signal/frontier_coverage_25/centered_abs_mean": 0.08664727360010147,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8840277777777779,
"signal/frontier_coverage_25/group_std_mean": 0.1133154422044754,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010830909595824778,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010830909595824778,
"signal/frontier_coverage_5/centered_abs_mean": 0.18070359230041505,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8350694444444444,
"signal/frontier_coverage_5/group_std_mean": 0.23828611075878142,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002258794941008091,
"signal/frontier_ece_reward/centered_abs_mean": 0.025348235666751862,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7229166666666668,
"signal/frontier_ece_reward/group_std_mean": 0.032069115340709685,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025348236784338953,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025348236784338953,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2727514892816544,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7552083333333334,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34558807611465453,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027275149524211884,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027275149524211884,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_calibration/aurc": 0.14901822974262594,
"eval_calibration/batch_distribution_entropy": 0.9304030834834719,
"eval_calibration/batch_entropy_100bins": 0.6979890618590382,
"eval_calibration/batch_entropy_10bins": 0.9304030834834719,
"eval_calibration/batch_entropy_50bins": 0.7766607817845022,
"eval_calibration/batch_uniqueness": 0.8930084974900759,
"eval_calibration/buffer_distribution_entropy": 0.97765497693518,
"eval_calibration/buffer_entropy_100bins": 0.9841955955582375,
"eval_calibration/buffer_entropy_10bins": 0.97765497693518,
"eval_calibration/buffer_entropy_50bins": 0.9847609370459999,
"eval_calibration/confidence_entropy": 0.4936314506498249,
"eval_calibration/coverage@0%": 0.26278001792114697,
"eval_calibration/coverage@1%": 0.26278001792114697,
"eval_calibration/coverage@10%": 0.45868055555555554,
"eval_calibration/coverage@15%": 0.6025649641577061,
"eval_calibration/coverage@20%": 0.7132840501792114,
"eval_calibration/coverage@25%": 0.9083893369175627,
"eval_calibration/coverage@30%": 0.9567988351254479,
"eval_calibration/coverage@5%": 0.3044466845878136,
"eval_calibration/ece": 0.2226041519685852,
"eval_calibration/mean_confidence": 0.5655246136044548,
"eval_calibration/prompt_uniqueness": 0.8930084974900759,
"eval_completions/clipped_ratio": 0.010416666666666666,
"eval_completions/max_length": 2470.5,
"eval_completions/max_terminated_length": 2470.5,
"eval_completions/mean_length": 634.5142517089844,
"eval_completions/mean_terminated_length": 641.2839864095052,
"eval_completions/min_length": 52.5,
"eval_completions/min_terminated_length": 234.16666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 298591861.0,
"eval_reward": 0.9375461836655935,
"eval_reward_std": 0.23832263300816217,
"eval_rewards/accuracy_reward": 0.6762152711550394,
"eval_rewards/brier_reward": 0.782086193561554,
"eval_rewards/confidence_uniqueness_reward": 0.8867801527182261,
"eval_rewards/format_reward": 0.987847218910853,
"eval_rewards/frontier_aurc_reward": -0.0018945778623068084,
"eval_rewards/frontier_coverage_0": 0.008154223828266064,
"eval_rewards/frontier_coverage_1": 0.008154223828266064,
"eval_rewards/frontier_coverage_10": 0.008154223828266064,
"eval_rewards/frontier_coverage_15": 0.008154223828266064,
"eval_rewards/frontier_coverage_20": 0.013963257893919945,
"eval_rewards/frontier_coverage_25": 0.05141168336073557,
"eval_rewards/frontier_coverage_5": 0.008154223828266064,
"eval_rewards/frontier_ece_reward": 0.004938475166757901,
"eval_rewards/frontier_entropy_batch_reward": -0.6316870252291361,
"eval_runtime": 214.6923,
"eval_samples_per_second": 4.658,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4254014740387599,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4673873384793599,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21270073701937994,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21270073701937994,
"eval_signal/advantage_abs_mean": 0.20312496026357016,
"eval_signal/advantage_pre_scale_abs_mean": 0.20312496026357016,
"eval_signal/advantage_pre_scale_std": 0.23702458292245865,
"eval_signal/advantage_std": 0.23702458292245865,
"eval_signal/brier_reward/centered_abs_mean": 0.202142134308815,
"eval_signal/brier_reward/group_bin_occupancy": 0.8888888888888888,
"eval_signal/brier_reward/group_std_mean": 0.2583857501546542,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020214214610556763,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020214214610556763,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05369566256801287,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3923611111111111,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08944027374188106,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005369566303367416,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005369566303367416,
"eval_signal/format_reward/centered_abs_mean": 0.023328992693374555,
"eval_signal/format_reward/group_bin_occupancy": 0.16666666666666666,
"eval_signal/format_reward/group_std_mean": 0.06276767483601968,
"eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011664496346687278,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.011664496346687278,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0031803955983680985,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5972222222222222,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007048736986083289,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9754943524409704e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9754943524409704e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.28077225387096405,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9374999999999999,
"eval_signal/frontier_coverage_0/group_std_mean": 0.39534174899260205,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.28077225387096405,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9374999999999999,
"eval_signal/frontier_coverage_1/group_std_mean": 0.39534174899260205,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.28077225387096405,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9374999999999999,
"eval_signal/frontier_coverage_10/group_std_mean": 0.39534174899260205,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.28077225387096405,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9374999999999999,
"eval_signal/frontier_coverage_15/group_std_mean": 0.39534174899260205,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.21713952968517938,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8993055555555557,
"eval_signal/frontier_coverage_20/group_std_mean": 0.3139382104078929,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027142442607631287,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027142442607631287,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08837362627188365,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.954861111111111,
"eval_signal/frontier_coverage_25/group_std_mean": 0.1131880668302377,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011046703827256958,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011046703827256958,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.28077225387096405,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9374999999999999,
"eval_signal/frontier_coverage_5/group_std_mean": 0.39534174899260205,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035096531501039863,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03151553000013033,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9375,
"eval_signal/frontier_ece_reward/group_std_mean": 0.042221867789824806,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031515529456858835,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031515529456858835,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3152608970801036,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2916666666666667,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3325229287147522,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03152609150856733,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03152609150856733,
"eval_steps_per_second": 0.028,
"step": 150
},
{
"calibration/aurc": 0.12418416862657974,
"calibration/batch_distribution_entropy": 0.9738042618432547,
"calibration/batch_entropy_100bins": 0.959685408987745,
"calibration/batch_entropy_10bins": 0.9738042618432547,
"calibration/batch_entropy_50bins": 0.9715188105067746,
"calibration/batch_uniqueness": 0.9511053370700557,
"calibration/buffer_distribution_entropy": 0.9796740268447254,
"calibration/buffer_entropy_100bins": 0.9863859099776396,
"calibration/buffer_entropy_10bins": 0.9796740268447254,
"calibration/buffer_entropy_50bins": 0.986449386820567,
"calibration/confidence_entropy": 0.4959676103167527,
"calibration/coverage@0%": 0.05926240634931017,
"calibration/coverage@1%": 0.05926240634931017,
"calibration/coverage@10%": 0.6127707884216798,
"calibration/coverage@15%": 0.7128341997211541,
"calibration/coverage@20%": 0.8080721228110945,
"calibration/coverage@25%": 0.8990786433912484,
"calibration/coverage@30%": 0.9890339425587467,
"calibration/coverage@5%": 0.3137256346668646,
"calibration/ece": 0.21717115584354518,
"calibration/mean_confidence": 0.5605943705821324,
"calibration/prompt_uniqueness": 0.8582839084278222,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009027777777777768,
"completions/max_length": 3711.8,
"completions/max_terminated_length": 3711.8,
"completions/mean_length": 620.5934936523438,
"completions/mean_terminated_length": 626.3040405273438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.2,
"epoch": 0.3719953500581243,
"grad_norm": 0.00040308100869879127,
"learning_rate": 1.5963855421686747e-06,
"loss": -0.0072,
"num_tokens": 308848810.0,
"reward": 1.0218815565109254,
"reward_std": 0.12359268218278885,
"rewards/accuracy_reward": 0.7443576335906983,
"rewards/brier_reward": 0.7995624542236328,
"rewards/confidence_uniqueness_reward": 0.9407005667686462,
"rewards/format_reward": 0.9903645873069763,
"rewards/frontier_aurc_reward": -0.0011058273608796298,
"rewards/frontier_coverage_0": -0.020049982517957688,
"rewards/frontier_coverage_1": -0.020049982517957688,
"rewards/frontier_coverage_10": -0.020049982517957688,
"rewards/frontier_coverage_15": -0.020049982517957688,
"rewards/frontier_coverage_20": 0.007909675501286984,
"rewards/frontier_coverage_25": 0.07766608744859696,
"rewards/frontier_coverage_5": -0.020049982517957688,
"rewards/frontier_ece_reward": 0.0013356797680899035,
"rewards/frontier_entropy_batch_reward": -0.19442155659198762,
"signal/accuracy_reward/centered_abs_mean": 0.1566785991191864,
"signal/accuracy_reward/group_bin_occupancy": 0.19791666666666666,
"signal/accuracy_reward/group_std_mean": 0.20641724467277528,
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0783392995595932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0783392995595932,
"signal/advantage_abs_mean": 0.09009798169136048,
"signal/advantage_pre_scale_abs_mean": 0.09009798169136048,
"signal/advantage_pre_scale_std": 0.15116022229194642,
"signal/advantage_std": 0.15116022229194642,
"signal/brier_reward/centered_abs_mean": 0.14188904762268068,
"signal/brier_reward/group_bin_occupancy": 0.8482638888888889,
"signal/brier_reward/group_std_mean": 0.18152420222759247,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014188905246555805,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014188905246555805,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028706640005111694,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8447916666666666,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04965458139777183,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002870664047077298,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002870664047077298,
"signal/format_reward/centered_abs_mean": 0.01727973110973835,
"signal/format_reward/group_bin_occupancy": 0.14513888888888887,
"signal/format_reward/group_std_mean": 0.03610437363386154,
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008639865554869175,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008639865554869175,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016173893585801125,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7194444444444444,
"signal/frontier_aurc_reward/group_std_mean": 0.002777449763379991,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.021736818278441e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.021736818278441e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19061762392520903,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8347222222222221,
"signal/frontier_coverage_0/group_std_mean": 0.2524797976016998,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_1/centered_abs_mean": 0.19061762392520903,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8347222222222221,
"signal/frontier_coverage_1/group_std_mean": 0.2524797976016998,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_10/centered_abs_mean": 0.19061762392520903,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8347222222222221,
"signal/frontier_coverage_10/group_std_mean": 0.2524797976016998,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_15/centered_abs_mean": 0.19061762392520903,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8347222222222221,
"signal/frontier_coverage_15/group_std_mean": 0.2524797976016998,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_20/centered_abs_mean": 0.10665316879749298,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8378472222222222,
"signal/frontier_coverage_20/group_std_mean": 0.1441801980137825,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013331646099686623,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013331646099686623,
"signal/frontier_coverage_25/centered_abs_mean": 0.07222038358449936,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9309027777777776,
"signal/frontier_coverage_25/group_std_mean": 0.0917926698923111,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009027547785080969,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009027547785080969,
"signal/frontier_coverage_5/centered_abs_mean": 0.19061762392520903,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8347222222222221,
"signal/frontier_coverage_5/group_std_mean": 0.2524797976016998,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002382720448076725,
"signal/frontier_ece_reward/centered_abs_mean": 0.023065327480435372,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7246527777777778,
"signal/frontier_ece_reward/group_std_mean": 0.02965252809226513,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023065326735377313,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023065326735377313,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24839998483657838,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7788194444444445,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31682642698287966,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02484000064432621,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02484000064432621,
"step": 155
},
{
"calibration/aurc": 0.13573148767459628,
"calibration/batch_distribution_entropy": 0.9496131093908463,
"calibration/batch_entropy_100bins": 0.9478886953307164,
"calibration/batch_entropy_10bins": 0.9496131093908463,
"calibration/batch_entropy_50bins": 0.9558848076576348,
"calibration/batch_uniqueness": 0.9477837762673808,
"calibration/buffer_distribution_entropy": 0.9833397575756475,
"calibration/buffer_entropy_100bins": 0.9902063640434747,
"calibration/buffer_entropy_10bins": 0.9833397575756475,
"calibration/buffer_entropy_50bins": 0.9895285524682471,
"calibration/confidence_entropy": 0.5064340839226602,
"calibration/coverage@0%": 0.06199548520452567,
"calibration/coverage@1%": 0.100016318537859,
"calibration/coverage@10%": 0.4863422228857656,
"calibration/coverage@15%": 0.7070152931318631,
"calibration/coverage@20%": 0.8079925379000178,
"calibration/coverage@25%": 0.8801128926701571,
"calibration/coverage@30%": 0.9193717277486911,
"calibration/coverage@5%": 0.3584050596726108,
"calibration/ece": 0.1611499914834087,
"calibration/mean_confidence": 0.6053540170302975,
"calibration/prompt_uniqueness": 0.8627381388571653,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013194444444444443,
"completions/max_length": 3444.2,
"completions/max_terminated_length": 3444.2,
"completions/mean_length": 622.8822998046875,
"completions/mean_terminated_length": 631.2034301757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 193.8,
"epoch": 0.38399520005999926,
"grad_norm": 0.0003652535378932953,
"learning_rate": 1.4457831325301204e-06,
"loss": -0.0111,
"num_tokens": 319111710.0,
"reward": 0.988642418384552,
"reward_std": 0.1208130583167076,
"rewards/accuracy_reward": 0.6771701335906982,
"rewards/brier_reward": 0.7907926917076111,
"rewards/confidence_uniqueness_reward": 0.9372875452041626,
"rewards/format_reward": 0.9866319298744202,
"rewards/frontier_aurc_reward": -0.0019396688556298613,
"rewards/frontier_coverage_0": 0.009741135686635972,
"rewards/frontier_coverage_1": 0.009741135686635972,
"rewards/frontier_coverage_10": 0.009741135686635972,
"rewards/frontier_coverage_15": 0.01013163048774004,
"rewards/frontier_coverage_20": 0.027610554732382296,
"rewards/frontier_coverage_25": 0.08821047395467758,
"rewards/frontier_coverage_5": 0.009741135686635972,
"rewards/frontier_ece_reward": 0.0028510759511846118,
"rewards/frontier_entropy_batch_reward": -0.18388957977294923,
"signal/accuracy_reward/centered_abs_mean": 0.14061957597732544,
"signal/accuracy_reward/group_bin_occupancy": 0.19201388888888887,
"signal/accuracy_reward/group_std_mean": 0.1869141399860382,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07030978798866272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07030978798866272,
"signal/advantage_abs_mean": 0.08863120973110199,
"signal/advantage_pre_scale_abs_mean": 0.08863120973110199,
"signal/advantage_pre_scale_std": 0.15098720490932466,
"signal/advantage_std": 0.15098720490932466,
"signal/brier_reward/centered_abs_mean": 0.14210671186447144,
"signal/brier_reward/group_bin_occupancy": 0.8583333333333334,
"signal/brier_reward/group_std_mean": 0.18185594975948333,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014210670255124569,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014210670255124569,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033150676265358926,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.846875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.052501931041479113,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003315067803487182,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003315067803487182,
"signal/format_reward/centered_abs_mean": 0.02215711772441864,
"signal/format_reward/group_bin_occupancy": 0.14409722222222224,
"signal/format_reward/group_std_mean": 0.03936988487839699,
"signal/format_reward/group_zero_std_frac": 0.8472222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01107855886220932,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01107855886220932,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002279521874152124,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6902777777777778,
"signal/frontier_aurc_reward/group_std_mean": 0.0038658153265714646,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8494023717939852e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8494023717939852e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17956892549991607,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_0/group_std_mean": 0.23461733758449554,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002244611643254757,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002244611643254757,
"signal/frontier_coverage_1/centered_abs_mean": 0.17956892549991607,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_1/group_std_mean": 0.23461733758449554,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002244611643254757,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002244611643254757,
"signal/frontier_coverage_10/centered_abs_mean": 0.17956892549991607,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_10/group_std_mean": 0.23461733758449554,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002244611643254757,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002244611643254757,
"signal/frontier_coverage_15/centered_abs_mean": 0.17778740525245668,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8458333333333332,
"signal/frontier_coverage_15/group_std_mean": 0.2324183076620102,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00222234264947474,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00222234264947474,
"signal/frontier_coverage_20/centered_abs_mean": 0.06870782449841499,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9017361111111111,
"signal/frontier_coverage_20/group_std_mean": 0.0908221110701561,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008588478667661548,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008588478667661548,
"signal/frontier_coverage_25/centered_abs_mean": 0.0829654261469841,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9145833333333334,
"signal/frontier_coverage_25/group_std_mean": 0.10622318387031555,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010370678268373013,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010370678268373013,
"signal/frontier_coverage_5/centered_abs_mean": 0.17956892549991607,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8472222222222221,
"signal/frontier_coverage_5/group_std_mean": 0.23461733758449554,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002244611643254757,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002244611643254757,
"signal/frontier_ece_reward/centered_abs_mean": 0.02158619686961174,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7381944444444445,
"signal/frontier_ece_reward/group_std_mean": 0.027624867483973505,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021586197894066573,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021586197894066573,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23778702020645143,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7749999999999999,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3028127193450928,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023778701573610304,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023778701573610304,
"step": 160
},
{
"calibration/aurc": 0.15272836679211094,
"calibration/batch_distribution_entropy": 0.9627599417273558,
"calibration/batch_entropy_100bins": 0.9522571385844725,
"calibration/batch_entropy_10bins": 0.9627599417273558,
"calibration/batch_entropy_50bins": 0.9613007206497416,
"calibration/batch_uniqueness": 0.9484824117838434,
"calibration/buffer_distribution_entropy": 0.9855117090133282,
"calibration/buffer_entropy_100bins": 0.9919619031900998,
"calibration/buffer_entropy_10bins": 0.9855117090133282,
"calibration/buffer_entropy_50bins": 0.9911634713514934,
"calibration/confidence_entropy": 0.4857673257774547,
"calibration/coverage@0%": 0.049065118846368376,
"calibration/coverage@1%": 0.07951131307209017,
"calibration/coverage@10%": 0.5366995054961048,
"calibration/coverage@15%": 0.6177436479560819,
"calibration/coverage@20%": 0.6741639757050029,
"calibration/coverage@25%": 0.7182050523398605,
"calibration/coverage@30%": 0.8612321694819425,
"calibration/coverage@5%": 0.28688341146053287,
"calibration/ece": 0.1691076617981678,
"calibration/mean_confidence": 0.5515990495421639,
"calibration/prompt_uniqueness": 0.8613246886758482,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01640625000000002,
"completions/max_length": 3573.0,
"completions/max_terminated_length": 3573.0,
"completions/mean_length": 642.81328125,
"completions/mean_terminated_length": 653.6934326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 206.4,
"epoch": 0.39599505006187424,
"grad_norm": 0.00042818221845664084,
"learning_rate": 1.2951807228915664e-06,
"loss": -0.012,
"num_tokens": 329655991.0,
"reward": 0.9806491017341614,
"reward_std": 0.12677238285541534,
"rewards/accuracy_reward": 0.6590277671813964,
"rewards/brier_reward": 0.7924768686294555,
"rewards/confidence_uniqueness_reward": 0.9346436381340026,
"rewards/format_reward": 0.983506953716278,
"rewards/frontier_aurc_reward": -0.001722504827193916,
"rewards/frontier_coverage_0": 0.028906658757478,
"rewards/frontier_coverage_1": 0.028906658757478,
"rewards/frontier_coverage_10": 0.028906658757478,
"rewards/frontier_coverage_15": 0.031030337116681038,
"rewards/frontier_coverage_20": 0.0429856464266777,
"rewards/frontier_coverage_25": 0.10405687540769577,
"rewards/frontier_coverage_5": 0.028906658757478,
"rewards/frontier_ece_reward": 0.003673038515262306,
"rewards/frontier_entropy_batch_reward": -0.17347353994846343,
"signal/accuracy_reward/centered_abs_mean": 0.14384765326976776,
"signal/accuracy_reward/group_bin_occupancy": 0.19444444444444445,
"signal/accuracy_reward/group_std_mean": 0.1913081645965576,
"signal/accuracy_reward/group_zero_std_frac": 0.44444444179534914,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07192382663488388,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07192382663488388,
"signal/advantage_abs_mean": 0.09211225062608719,
"signal/advantage_pre_scale_abs_mean": 0.09211225062608719,
"signal/advantage_pre_scale_std": 0.15569303929805756,
"signal/advantage_std": 0.15569303929805756,
"signal/brier_reward/centered_abs_mean": 0.1462089329957962,
"signal/brier_reward/group_bin_occupancy": 0.8246527777777779,
"signal/brier_reward/group_std_mean": 0.18975663781166077,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014620893821120261,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014620893821120261,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03635745905339718,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.820486111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05932655856013298,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003635745914652944,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003635745914652944,
"signal/format_reward/centered_abs_mean": 0.02596571184694767,
"signal/format_reward/group_bin_occupancy": 0.1486111111111111,
"signal/format_reward/group_std_mean": 0.04700228720903397,
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012982855923473835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012982855923473835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002158830175176263,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6850694444444445,
"signal/frontier_aurc_reward/group_std_mean": 0.0039873755071312186,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6985378281096927e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6985378281096927e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19089278280735017,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8347222222222224,
"signal/frontier_coverage_0/group_std_mean": 0.2473309278488159,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023861598689109086,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023861598689109086,
"signal/frontier_coverage_1/centered_abs_mean": 0.19089278280735017,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8347222222222224,
"signal/frontier_coverage_1/group_std_mean": 0.2473309278488159,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023861598689109086,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023861598689109086,
"signal/frontier_coverage_10/centered_abs_mean": 0.19089278280735017,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8347222222222224,
"signal/frontier_coverage_10/group_std_mean": 0.2473309278488159,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023861598689109086,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023861598689109086,
"signal/frontier_coverage_15/centered_abs_mean": 0.1793476462364197,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8326388888888889,
"signal/frontier_coverage_15/group_std_mean": 0.2329329788684845,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002241845661774278,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002241845661774278,
"signal/frontier_coverage_20/centered_abs_mean": 0.06690727174282074,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9097222222222221,
"signal/frontier_coverage_20/group_std_mean": 0.08647293150424958,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008363408851437271,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008363408851437271,
"signal/frontier_coverage_25/centered_abs_mean": 0.0914057046175003,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9003472222222222,
"signal/frontier_coverage_25/group_std_mean": 0.1181455373764038,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011425713310018182,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011425713310018182,
"signal/frontier_coverage_5/centered_abs_mean": 0.19089278280735017,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8347222222222224,
"signal/frontier_coverage_5/group_std_mean": 0.2473309278488159,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023861598689109086,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023861598689109086,
"signal/frontier_ece_reward/centered_abs_mean": 0.022229710966348647,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7388888888888889,
"signal/frontier_ece_reward/group_std_mean": 0.02815890610218048,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022229711525142194,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022229711525142194,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23444273173809052,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.30144866108894347,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02344427481293678,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02344427481293678,
"step": 165
},
{
"calibration/aurc": 0.1350255119426556,
"calibration/batch_distribution_entropy": 0.9467153136348341,
"calibration/batch_entropy_100bins": 0.9448354422616114,
"calibration/batch_entropy_10bins": 0.9467153136348341,
"calibration/batch_entropy_50bins": 0.9524946276164183,
"calibration/batch_uniqueness": 0.9455724587067573,
"calibration/buffer_distribution_entropy": 0.9863591468776306,
"calibration/buffer_entropy_100bins": 0.9924262206930579,
"calibration/buffer_entropy_10bins": 0.9863591468776306,
"calibration/buffer_entropy_50bins": 0.9916857227941385,
"calibration/confidence_entropy": 0.4943049964540047,
"calibration/coverage@0%": 0.10312881092467825,
"calibration/coverage@1%": 0.10312881092467825,
"calibration/coverage@10%": 0.46204870130918313,
"calibration/coverage@15%": 0.5477847318292854,
"calibration/coverage@20%": 0.7477854206561981,
"calibration/coverage@25%": 0.833632965941813,
"calibration/coverage@30%": 0.9413540434344725,
"calibration/coverage@5%": 0.3250324397385173,
"calibration/ece": 0.13538983337137994,
"calibration/mean_confidence": 0.6091723019901613,
"calibration/prompt_uniqueness": 0.8639295314719903,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011979166666666652,
"completions/max_length": 3396.8,
"completions/max_terminated_length": 3396.8,
"completions/mean_length": 623.0584350585938,
"completions/mean_terminated_length": 630.6281982421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.2,
"epoch": 0.4079949000637492,
"grad_norm": 0.00041778094600886106,
"learning_rate": 1.1445783132530121e-06,
"loss": -0.0105,
"num_tokens": 339922808.0,
"reward": 1.0096432447433472,
"reward_std": 0.1250714048743248,
"rewards/accuracy_reward": 0.7217013955116272,
"rewards/brier_reward": 0.8008638501167298,
"rewards/confidence_uniqueness_reward": 0.9373578429222107,
"rewards/format_reward": 0.9880208373069763,
"rewards/frontier_aurc_reward": -0.00128103963797912,
"rewards/frontier_coverage_0": -0.011224100925028324,
"rewards/frontier_coverage_1": -0.011224100925028324,
"rewards/frontier_coverage_10": -0.011125411931425333,
"rewards/frontier_coverage_15": 0.000582283828407526,
"rewards/frontier_coverage_20": 0.05009397864341736,
"rewards/frontier_coverage_25": 0.1337550863623619,
"rewards/frontier_coverage_5": -0.011224100925028324,
"rewards/frontier_ece_reward": -0.0010974591568810865,
"rewards/frontier_entropy_batch_reward": -0.2065970182418823,
"signal/accuracy_reward/centered_abs_mean": 0.14814453125,
"signal/accuracy_reward/group_bin_occupancy": 0.19479166666666667,
"signal/accuracy_reward/group_std_mean": 0.19610781967639923,
"signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.074072265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.074072265625,
"signal/advantage_abs_mean": 0.0921988844871521,
"signal/advantage_pre_scale_abs_mean": 0.0921988844871521,
"signal/advantage_pre_scale_std": 0.15291462242603301,
"signal/advantage_std": 0.15291462242603301,
"signal/brier_reward/centered_abs_mean": 0.14049543142318727,
"signal/brier_reward/group_bin_occupancy": 0.8541666666666666,
"signal/brier_reward/group_std_mean": 0.17984696626663207,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01404954344034195,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01404954344034195,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0325088482350111,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.834375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05347518250346184,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032508848700672386,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032508848700672386,
"signal/format_reward/centered_abs_mean": 0.020540364272892474,
"signal/format_reward/group_bin_occupancy": 0.1454861111111111,
"signal/format_reward/group_std_mean": 0.039224734902381896,
"signal/format_reward/group_zero_std_frac": 0.8361111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010270182136446237,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010270182136446237,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017802180489525199,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6770833333333333,
"signal/frontier_aurc_reward/group_std_mean": 0.003222810197621584,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2252726193983107e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2252726193983107e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1828035831451416,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8583333333333334,
"signal/frontier_coverage_0/group_std_mean": 0.2377503514289856,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022850447334349156,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022850447334349156,
"signal/frontier_coverage_1/centered_abs_mean": 0.1828035831451416,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8583333333333334,
"signal/frontier_coverage_1/group_std_mean": 0.2377503514289856,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022850447334349156,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022850447334349156,
"signal/frontier_coverage_10/centered_abs_mean": 0.18264002799987794,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8586805555555556,
"signal/frontier_coverage_10/group_std_mean": 0.2375439763069153,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022830002941191196,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022830002941191196,
"signal/frontier_coverage_15/centered_abs_mean": 0.15461563766002656,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8496527777777778,
"signal/frontier_coverage_15/group_std_mean": 0.20210520327091216,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019326955080032349,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019326955080032349,
"signal/frontier_coverage_20/centered_abs_mean": 0.06322543397545814,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9326388888888889,
"signal/frontier_coverage_20/group_std_mean": 0.08029639273881913,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007903179153800011,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007903179153800011,
"signal/frontier_coverage_25/centered_abs_mean": 0.101860611140728,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9097222222222221,
"signal/frontier_coverage_25/group_std_mean": 0.1299730733036995,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012732576811686157,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012732576811686157,
"signal/frontier_coverage_5/centered_abs_mean": 0.1828035831451416,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8583333333333334,
"signal/frontier_coverage_5/group_std_mean": 0.2377503514289856,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022850447334349156,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022850447334349156,
"signal/frontier_ece_reward/centered_abs_mean": 0.02175499051809311,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7347222222222223,
"signal/frontier_ece_reward/group_std_mean": 0.02739621587097645,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021754990331828592,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021754990331828592,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25911190211772916,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7975694444444444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3297302842140198,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02591119073331356,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02591119073331356,
"step": 170
},
{
"calibration/aurc": 0.11596802537585997,
"calibration/batch_distribution_entropy": 0.9772162270629645,
"calibration/batch_entropy_100bins": 0.9590083538748349,
"calibration/batch_entropy_10bins": 0.9772162270629645,
"calibration/batch_entropy_50bins": 0.9696121620848036,
"calibration/batch_uniqueness": 0.9508310218025986,
"calibration/buffer_distribution_entropy": 0.985610419410986,
"calibration/buffer_entropy_100bins": 0.9920422218752784,
"calibration/buffer_entropy_10bins": 0.985610419410986,
"calibration/buffer_entropy_50bins": 0.9912291646995455,
"calibration/confidence_entropy": 0.4961519915892487,
"calibration/coverage@0%": 0.08600478646452267,
"calibration/coverage@1%": 0.08600478646452267,
"calibration/coverage@10%": 0.4732605731669655,
"calibration/coverage@15%": 0.7201678681063644,
"calibration/coverage@20%": 0.8435295212461836,
"calibration/coverage@25%": 0.9484174474094432,
"calibration/coverage@30%": 0.9905013192612138,
"calibration/coverage@5%": 0.30627206208368407,
"calibration/ece": 0.18982087542522702,
"calibration/mean_confidence": 0.5509683002177224,
"calibration/prompt_uniqueness": 0.8649400485298095,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013888888888888905,
"completions/max_length": 3743.8,
"completions/max_terminated_length": 3743.8,
"completions/mean_length": 650.516845703125,
"completions/mean_terminated_length": 659.7272583007813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.4,
"epoch": 0.4199947500656242,
"grad_norm": 0.0004142906400375068,
"learning_rate": 9.93975903614458e-07,
"loss": -0.0114,
"num_tokens": 350524730.0,
"reward": 1.0014106631278992,
"reward_std": 0.12218387722969055,
"rewards/accuracy_reward": 0.7021701455116272,
"rewards/brier_reward": 0.7892498970031738,
"rewards/confidence_uniqueness_reward": 0.9369961380958557,
"rewards/format_reward": 0.9857638835906982,
"rewards/frontier_aurc_reward": -0.001478305645287037,
"rewards/frontier_coverage_0": -0.006911272555589676,
"rewards/frontier_coverage_1": -0.006911272555589676,
"rewards/frontier_coverage_10": -0.006758286617696285,
"rewards/frontier_coverage_15": 0.0018855141475796699,
"rewards/frontier_coverage_20": 0.05244411379098892,
"rewards/frontier_coverage_25": 0.13218926042318344,
"rewards/frontier_coverage_5": -0.006911272555589676,
"rewards/frontier_ece_reward": -0.00075130017939955,
"rewards/frontier_entropy_batch_reward": -0.1707520604133606,
"signal/accuracy_reward/centered_abs_mean": 0.1440049946308136,
"signal/accuracy_reward/group_bin_occupancy": 0.19618055555555555,
"signal/accuracy_reward/group_std_mean": 0.19517480432987214,
"signal/accuracy_reward/group_zero_std_frac": 0.43055556416511537,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0720024973154068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0720024973154068,
"signal/advantage_abs_mean": 0.08900740891695022,
"signal/advantage_pre_scale_abs_mean": 0.08900740891695022,
"signal/advantage_pre_scale_std": 0.15330225825309754,
"signal/advantage_std": 0.15330225825309754,
"signal/brier_reward/centered_abs_mean": 0.14266086518764495,
"signal/brier_reward/group_bin_occupancy": 0.8486111111111111,
"signal/brier_reward/group_std_mean": 0.18242388367652893,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01426608581095934,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01426608581095934,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03368383906781673,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8496527777777777,
"signal/confidence_uniqueness_reward/group_std_mean": 0.052182822674512866,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003368384018540382,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003368384018540382,
"signal/format_reward/centered_abs_mean": 0.022667100839316844,
"signal/format_reward/group_bin_occupancy": 0.14340277777777777,
"signal/format_reward/group_std_mean": 0.03894899114966392,
"signal/format_reward/group_zero_std_frac": 0.8527777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011333550419658422,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011333550419658422,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019595161313191055,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6739583333333332,
"signal/frontier_aurc_reward/group_std_mean": 0.003604071820154786,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.449395142321009e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.449395142321009e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18813599050045013,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8409722222222221,
"signal/frontier_coverage_0/group_std_mean": 0.24627106189727782,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023517000023275613,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023517000023275613,
"signal/frontier_coverage_1/centered_abs_mean": 0.18813599050045013,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8409722222222221,
"signal/frontier_coverage_1/group_std_mean": 0.24627106189727782,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023517000023275613,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023517000023275613,
"signal/frontier_coverage_10/centered_abs_mean": 0.18753766417503356,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8399305555555555,
"signal/frontier_coverage_10/group_std_mean": 0.24552586376667024,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002344220783561468,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002344220783561468,
"signal/frontier_coverage_15/centered_abs_mean": 0.14854539334774017,
"signal/frontier_coverage_15/group_bin_occupancy": 0.834375,
"signal/frontier_coverage_15/group_std_mean": 0.19552876353263854,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018568174680694937,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018568174680694937,
"signal/frontier_coverage_20/centered_abs_mean": 0.06628052592277527,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9263888888888889,
"signal/frontier_coverage_20/group_std_mean": 0.08446042537689209,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008285066462121904,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008285066462121904,
"signal/frontier_coverage_25/centered_abs_mean": 0.10350020378828048,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8947916666666668,
"signal/frontier_coverage_25/group_std_mean": 0.13349340260028839,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012937525752931833,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012937525752931833,
"signal/frontier_coverage_5/centered_abs_mean": 0.18813599050045013,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8409722222222221,
"signal/frontier_coverage_5/group_std_mean": 0.24627106189727782,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023517000023275613,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023517000023275613,
"signal/frontier_ece_reward/centered_abs_mean": 0.021230778843164443,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7291666666666667,
"signal/frontier_ece_reward/group_std_mean": 0.02696690522134304,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002123078005388379,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002123078005388379,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2259067177772522,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.28947545886039733,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022590672224760057,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022590672224760057,
"step": 175
},
{
"calibration/aurc": 0.10141793628292742,
"calibration/batch_distribution_entropy": 0.97102851134233,
"calibration/batch_entropy_100bins": 0.9559282496757466,
"calibration/batch_entropy_10bins": 0.97102851134233,
"calibration/batch_entropy_50bins": 0.9685106670847736,
"calibration/batch_uniqueness": 0.9508384189223701,
"calibration/buffer_distribution_entropy": 0.9859982317210336,
"calibration/buffer_entropy_100bins": 0.9922246706460343,
"calibration/buffer_entropy_10bins": 0.9859982317210336,
"calibration/buffer_entropy_50bins": 0.9914588219746386,
"calibration/confidence_entropy": 0.49898035500245025,
"calibration/coverage@0%": 0.06338339682072068,
"calibration/coverage@1%": 0.13005006348738732,
"calibration/coverage@10%": 0.5578450042867328,
"calibration/coverage@15%": 0.8041507799553624,
"calibration/coverage@20%": 0.9102453475329633,
"calibration/coverage@25%": 0.9551202557445988,
"calibration/coverage@30%": 0.9805774278215222,
"calibration/coverage@5%": 0.2749898127789967,
"calibration/ece": 0.1917915387935666,
"calibration/mean_confidence": 0.5808151819529532,
"calibration/prompt_uniqueness": 0.8569296263730571,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014409722222222232,
"completions/max_length": 3805.4,
"completions/max_terminated_length": 3805.4,
"completions/mean_length": 637.946533203125,
"completions/mean_terminated_length": 647.2637451171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.8,
"epoch": 0.4319946000674992,
"grad_norm": 0.0003819975827354938,
"learning_rate": 8.433734939759036e-07,
"loss": -0.0126,
"num_tokens": 360973842.0,
"reward": 1.0002532005310059,
"reward_std": 0.12911737263202666,
"rewards/accuracy_reward": 0.7065972208976745,
"rewards/brier_reward": 0.7953470587730408,
"rewards/confidence_uniqueness_reward": 0.9346136093139649,
"rewards/format_reward": 0.9855902910232544,
"rewards/frontier_aurc_reward": -0.0019531417870894074,
"rewards/frontier_coverage_0": -0.002460658084601164,
"rewards/frontier_coverage_1": -0.002460658084601164,
"rewards/frontier_coverage_10": -0.002041639015078545,
"rewards/frontier_coverage_15": 0.008680144883692264,
"rewards/frontier_coverage_20": 0.06160227060317993,
"rewards/frontier_coverage_25": 0.1450663238763809,
"rewards/frontier_coverage_5": -0.002460658084601164,
"rewards/frontier_ece_reward": -0.0004450877895578742,
"rewards/frontier_entropy_batch_reward": -0.21341712474823,
"signal/accuracy_reward/centered_abs_mean": 0.15309244394302368,
"signal/accuracy_reward/group_bin_occupancy": 0.19340277777777778,
"signal/accuracy_reward/group_std_mean": 0.19739371538162231,
"signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07654622197151184,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07654622197151184,
"signal/advantage_abs_mean": 0.09620479941368103,
"signal/advantage_pre_scale_abs_mean": 0.09620479941368103,
"signal/advantage_pre_scale_std": 0.16078065931797028,
"signal/advantage_std": 0.16078065931797028,
"signal/brier_reward/centered_abs_mean": 0.13989888727664948,
"signal/brier_reward/group_bin_occupancy": 0.835763888888889,
"signal/brier_reward/group_std_mean": 0.18042805790901184,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013989889249205589,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013989889249205589,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03554730340838432,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8277777777777778,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05846796631813049,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003554730489850044,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003554730489850044,
"signal/format_reward/centered_abs_mean": 0.02422960065305233,
"signal/format_reward/group_bin_occupancy": 0.1482638888888889,
"signal/format_reward/group_std_mean": 0.04518317058682442,
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012114800326526166,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012114800326526166,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024452964775264264,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6625,
"signal/frontier_aurc_reward/group_std_mean": 0.004462533164769411,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.056620480492711e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.056620480492711e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18128202557563783,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8354166666666668,
"signal/frontier_coverage_0/group_std_mean": 0.23500166237354278,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002266025450080633,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002266025450080633,
"signal/frontier_coverage_1/centered_abs_mean": 0.18128202557563783,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8354166666666668,
"signal/frontier_coverage_1/group_std_mean": 0.23500166237354278,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002266025450080633,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002266025450080633,
"signal/frontier_coverage_10/centered_abs_mean": 0.17996532022953032,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8336805555555555,
"signal/frontier_coverage_10/group_std_mean": 0.23336804807186126,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002249566651880741,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002249566651880741,
"signal/frontier_coverage_15/centered_abs_mean": 0.11691176444292069,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8347222222222224,
"signal/frontier_coverage_15/group_std_mean": 0.15411899387836456,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00146139704156667,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00146139704156667,
"signal/frontier_coverage_20/centered_abs_mean": 0.06802540868520737,
"signal/frontier_coverage_20/group_bin_occupancy": 0.928125,
"signal/frontier_coverage_20/group_std_mean": 0.08644652813673019,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008503176271915436,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008503176271915436,
"signal/frontier_coverage_25/centered_abs_mean": 0.11125858575105667,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8972222222222221,
"signal/frontier_coverage_25/group_std_mean": 0.1438766151666641,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013907323591411114,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013907323591411114,
"signal/frontier_coverage_5/centered_abs_mean": 0.18128202557563783,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8354166666666668,
"signal/frontier_coverage_5/group_std_mean": 0.23500166237354278,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002266025450080633,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002266025450080633,
"signal/frontier_ece_reward/centered_abs_mean": 0.020656683668494224,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7170138888888888,
"signal/frontier_ece_reward/group_std_mean": 0.02589767798781395,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020656683016568424,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020656683016568424,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2591008573770523,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7864583333333333,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32451775670051575,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025910085812211037,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025910085812211037,
"step": 180
},
{
"calibration/aurc": 0.1956443021676986,
"calibration/batch_distribution_entropy": 0.9721898543770948,
"calibration/batch_entropy_100bins": 0.9582683246927992,
"calibration/batch_entropy_10bins": 0.9721898543770948,
"calibration/batch_entropy_50bins": 0.9690488174101344,
"calibration/batch_uniqueness": 0.9514666032368121,
"calibration/buffer_distribution_entropy": 0.9854666800775806,
"calibration/buffer_entropy_100bins": 0.9919698091057843,
"calibration/buffer_entropy_10bins": 0.9854666800775806,
"calibration/buffer_entropy_50bins": 0.991146072484287,
"calibration/confidence_entropy": 0.5068397741253167,
"calibration/coverage@0%": 0.013618119773481246,
"calibration/coverage@1%": 0.013618119773481246,
"calibration/coverage@10%": 0.1095949729010646,
"calibration/coverage@15%": 0.5379352359546358,
"calibration/coverage@20%": 0.6796718053797843,
"calibration/coverage@25%": 0.8897106640947919,
"calibration/coverage@30%": 0.9510526315789474,
"calibration/coverage@5%": 0.02723068521850743,
"calibration/ece": 0.2253786384553338,
"calibration/mean_confidence": 0.5595226877564153,
"calibration/prompt_uniqueness": 0.8615784712587196,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012152777777777745,
"completions/max_length": 3242.0,
"completions/max_terminated_length": 3242.0,
"completions/mean_length": 643.6135498046875,
"completions/mean_terminated_length": 651.6073974609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.8,
"epoch": 0.44399445006937416,
"grad_norm": 0.00043083218042738736,
"learning_rate": 6.927710843373495e-07,
"loss": -0.0099,
"num_tokens": 371478318.0,
"reward": 0.9993168830871582,
"reward_std": 0.1252796620130539,
"rewards/accuracy_reward": 0.6919270873069763,
"rewards/brier_reward": 0.7822542548179626,
"rewards/confidence_uniqueness_reward": 0.940218985080719,
"rewards/format_reward": 0.9877604126930237,
"rewards/frontier_aurc_reward": -0.001636920589953661,
"rewards/frontier_coverage_0": -0.009225619398057461,
"rewards/frontier_coverage_1": -0.009225619398057461,
"rewards/frontier_coverage_10": -0.00855890940874815,
"rewards/frontier_coverage_15": 0.014033466950058937,
"rewards/frontier_coverage_20": 0.05618218407034874,
"rewards/frontier_coverage_25": 0.12737512439489365,
"rewards/frontier_coverage_5": -0.009225619398057461,
"rewards/frontier_ece_reward": -0.0031481004785746335,
"rewards/frontier_entropy_batch_reward": -0.14455921649932862,
"signal/accuracy_reward/centered_abs_mean": 0.15904405415058137,
"signal/accuracy_reward/group_bin_occupancy": 0.19548611111111108,
"signal/accuracy_reward/group_std_mean": 0.20459264814853667,
"signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07952202707529069,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07952202707529069,
"signal/advantage_abs_mean": 0.0955008551478386,
"signal/advantage_pre_scale_abs_mean": 0.0955008551478386,
"signal/advantage_pre_scale_std": 0.1540976881980896,
"signal/advantage_std": 0.1540976881980896,
"signal/brier_reward/centered_abs_mean": 0.14529342353343963,
"signal/brier_reward/group_bin_occupancy": 0.8604166666666666,
"signal/brier_reward/group_std_mean": 0.18353629410266875,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014529342763125896,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014529342763125896,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03009340800344944,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8524305555555556,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046408722549676894,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030093408189713956,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030093408189713956,
"signal/format_reward/centered_abs_mean": 0.01939561627805233,
"signal/format_reward/group_bin_occupancy": 0.14131944444444441,
"signal/format_reward/group_std_mean": 0.03335440866649151,
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009697808139026164,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009697808139026164,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020065686898306013,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6559027777777777,
"signal/frontier_aurc_reward/group_std_mean": 0.0037283867597579954,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.508210891392082e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.508210891392082e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1982041120529175,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8604166666666666,
"signal/frontier_coverage_0/group_std_mean": 0.2535953104496002,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024775514844805,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024775514844805,
"signal/frontier_coverage_1/centered_abs_mean": 0.1982041120529175,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8604166666666666,
"signal/frontier_coverage_1/group_std_mean": 0.2535953104496002,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024775514844805,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024775514844805,
"signal/frontier_coverage_10/centered_abs_mean": 0.195833483338356,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8583333333333332,
"signal/frontier_coverage_10/group_std_mean": 0.2506587952375412,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024479186162352563,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024479186162352563,
"signal/frontier_coverage_15/centered_abs_mean": 0.10796615332365037,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8600694444444444,
"signal/frontier_coverage_15/group_std_mean": 0.14121497869491578,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001349576935172081,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001349576935172081,
"signal/frontier_coverage_20/centered_abs_mean": 0.06482557505369187,
"signal/frontier_coverage_20/group_bin_occupancy": 0.91875,
"signal/frontier_coverage_20/group_std_mean": 0.0837198704481125,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008103197091259062,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008103197091259062,
"signal/frontier_coverage_25/centered_abs_mean": 0.10678046792745591,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_25/group_std_mean": 0.1394643157720566,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001334755914285779,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001334755914285779,
"signal/frontier_coverage_5/centered_abs_mean": 0.1982041120529175,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8604166666666666,
"signal/frontier_coverage_5/group_std_mean": 0.2535953104496002,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024775514844805,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024775514844805,
"signal/frontier_ece_reward/centered_abs_mean": 0.02167145274579525,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7135416666666667,
"signal/frontier_ece_reward/group_std_mean": 0.026740428060293198,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021671453956514596,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021671453956514596,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2169477492570877,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7670138888888889,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.28880282044410704,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02169477492570877,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02169477492570877,
"step": 185
},
{
"calibration/aurc": 0.16203958912445476,
"calibration/batch_distribution_entropy": 0.9564638809055342,
"calibration/batch_entropy_100bins": 0.9504243671299063,
"calibration/batch_entropy_10bins": 0.9564638809055342,
"calibration/batch_entropy_50bins": 0.9588082852441188,
"calibration/batch_uniqueness": 0.9485751236738553,
"calibration/buffer_distribution_entropy": 0.9855318683814028,
"calibration/buffer_entropy_100bins": 0.9920046840716885,
"calibration/buffer_entropy_10bins": 0.9855318683814028,
"calibration/buffer_entropy_50bins": 0.9911690583473515,
"calibration/confidence_entropy": 0.5012853403916044,
"calibration/coverage@0%": 0.05780618281758908,
"calibration/coverage@1%": 0.05780618281758908,
"calibration/coverage@10%": 0.31838293083869723,
"calibration/coverage@15%": 0.4790459460921778,
"calibration/coverage@20%": 0.5617222445744519,
"calibration/coverage@25%": 0.9087071261402514,
"calibration/coverage@30%": 0.9716345096745822,
"calibration/coverage@5%": 0.1443786218306891,
"calibration/ece": 0.17111073876394964,
"calibration/mean_confidence": 0.6004809067734467,
"calibration/prompt_uniqueness": 0.8665386519176552,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009461805555555536,
"completions/max_length": 3465.6,
"completions/max_terminated_length": 3465.6,
"completions/mean_length": 627.6765625,
"completions/mean_terminated_length": 633.6748168945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.6,
"epoch": 0.45599430007124914,
"grad_norm": 0.0004201448755338788,
"learning_rate": 5.421686746987952e-07,
"loss": -0.0078,
"num_tokens": 381792096.0,
"reward": 1.016054892539978,
"reward_std": 0.12401713877916336,
"rewards/accuracy_reward": 0.7311631917953492,
"rewards/brier_reward": 0.800844419002533,
"rewards/confidence_uniqueness_reward": 0.9397600173950196,
"rewards/format_reward": 0.9903645753860474,
"rewards/frontier_aurc_reward": -0.0016162074403837322,
"rewards/frontier_coverage_0": -0.015276820957660675,
"rewards/frontier_coverage_1": -0.015276820957660675,
"rewards/frontier_coverage_10": -0.014098763652145862,
"rewards/frontier_coverage_15": 0.01533528920263052,
"rewards/frontier_coverage_20": 0.07684787213802338,
"rewards/frontier_coverage_25": 0.1640935003757477,
"rewards/frontier_coverage_5": -0.015273858606815339,
"rewards/frontier_ece_reward": -0.0036209038575179876,
"rewards/frontier_entropy_batch_reward": -0.20841516852378844,
"signal/accuracy_reward/centered_abs_mean": 0.15256619155406953,
"signal/accuracy_reward/group_bin_occupancy": 0.19826388888888888,
"signal/accuracy_reward/group_std_mean": 0.20278047025203705,
"signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07628309577703477,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07628309577703477,
"signal/advantage_abs_mean": 0.09019981622695923,
"signal/advantage_pre_scale_abs_mean": 0.09019981622695923,
"signal/advantage_pre_scale_std": 0.15100550949573516,
"signal/advantage_std": 0.15100550949573516,
"signal/brier_reward/centered_abs_mean": 0.13891661763191224,
"signal/brier_reward/group_bin_occupancy": 0.8350694444444444,
"signal/brier_reward/group_std_mean": 0.17901506423950195,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01389166172593832,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01389166172593832,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028540104255080224,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8395833333333333,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04933372884988785,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028540104161947966,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028540104161947966,
"signal/format_reward/centered_abs_mean": 0.01725802943110466,
"signal/format_reward/group_bin_occupancy": 0.14479166666666668,
"signal/format_reward/group_std_mean": 0.03579398356378079,
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00862901471555233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00862901471555233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020792306633666156,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6552083333333333,
"signal/frontier_aurc_reward/group_std_mean": 0.0039239289239048954,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5990382710006088e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5990382710006088e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18381263613700866,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8267361111111111,
"signal/frontier_coverage_0/group_std_mean": 0.23998367488384248,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022976579144597053,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022976579144597053,
"signal/frontier_coverage_1/centered_abs_mean": 0.18381263613700866,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8267361111111111,
"signal/frontier_coverage_1/group_std_mean": 0.23998367488384248,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022976579144597053,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022976579144597053,
"signal/frontier_coverage_10/centered_abs_mean": 0.1811767816543579,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8246527777777779,
"signal/frontier_coverage_10/group_std_mean": 0.2366650640964508,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022647099569439886,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022647099569439886,
"signal/frontier_coverage_15/centered_abs_mean": 0.09005323797464371,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8552083333333333,
"signal/frontier_coverage_15/group_std_mean": 0.11996055394411087,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011256654281169177,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011256654281169177,
"signal/frontier_coverage_20/centered_abs_mean": 0.07242253422737122,
"signal/frontier_coverage_20/group_bin_occupancy": 0.923611111111111,
"signal/frontier_coverage_20/group_std_mean": 0.09198382496833801,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009052816778421402,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009052816778421402,
"signal/frontier_coverage_25/centered_abs_mean": 0.11942882239818572,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8909722222222222,
"signal/frontier_coverage_25/group_std_mean": 0.15319141745567322,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014928602380678059,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014928602380678059,
"signal/frontier_coverage_5/centered_abs_mean": 0.18380914330482484,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8267361111111111,
"signal/frontier_coverage_5/group_std_mean": 0.23997901380062103,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002297614235430956,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002297614235430956,
"signal/frontier_ece_reward/centered_abs_mean": 0.02063850834965706,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6902777777777778,
"signal/frontier_ece_reward/group_std_mean": 0.02570592537522316,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002063850755803287,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002063850755803287,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2538691431283951,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7777777777777779,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3231283605098724,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025386914610862732,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025386914610862732,
"step": 190
},
{
"calibration/aurc": 0.1961372764660094,
"calibration/batch_distribution_entropy": 0.9787626043754896,
"calibration/batch_entropy_100bins": 0.9610797936801457,
"calibration/batch_entropy_10bins": 0.9787626043754896,
"calibration/batch_entropy_50bins": 0.9740262499384797,
"calibration/batch_uniqueness": 0.952937991093231,
"calibration/buffer_distribution_entropy": 0.9854658504578813,
"calibration/buffer_entropy_100bins": 0.9919799868819045,
"calibration/buffer_entropy_10bins": 0.9854658504578813,
"calibration/buffer_entropy_50bins": 0.9911334418833307,
"calibration/confidence_entropy": 0.4938269497363332,
"calibration/coverage@0%": 0.017848624480017818,
"calibration/coverage@1%": 0.017848624480017818,
"calibration/coverage@10%": 0.3216058794764497,
"calibration/coverage@15%": 0.44077104008081436,
"calibration/coverage@20%": 0.5248942584977563,
"calibration/coverage@25%": 0.7557016752611363,
"calibration/coverage@30%": 0.8214413911949145,
"calibration/coverage@5%": 0.14017045767584957,
"calibration/ece": 0.17108793119108695,
"calibration/mean_confidence": 0.5567763002783004,
"calibration/prompt_uniqueness": 0.8632525025551473,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012152777777777768,
"completions/max_length": 3528.2,
"completions/max_terminated_length": 3528.2,
"completions/mean_length": 654.6660522460937,
"completions/mean_terminated_length": 662.805517578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.2,
"epoch": 0.46799415007312406,
"grad_norm": 0.0004228286852594465,
"learning_rate": 3.91566265060241e-07,
"loss": -0.0096,
"num_tokens": 392414713.0,
"reward": 0.9901637196540832,
"reward_std": 0.12346882373094559,
"rewards/accuracy_reward": 0.6758680582046509,
"rewards/brier_reward": 0.7898530125617981,
"rewards/confidence_uniqueness_reward": 0.9386414170265198,
"rewards/format_reward": 0.9878472208976745,
"rewards/frontier_aurc_reward": -0.0017654816154390573,
"rewards/frontier_coverage_0": 0.008542282739654183,
"rewards/frontier_coverage_1": 0.008542282739654183,
"rewards/frontier_coverage_10": 0.00909471595659852,
"rewards/frontier_coverage_15": 0.0248194869607687,
"rewards/frontier_coverage_20": 0.07406894192099571,
"rewards/frontier_coverage_25": 0.14769483357667923,
"rewards/frontier_coverage_5": 0.008551673218607902,
"rewards/frontier_ece_reward": -0.0010886201984249056,
"rewards/frontier_entropy_batch_reward": -0.1792888253927231,
"signal/accuracy_reward/centered_abs_mean": 0.14333767145872117,
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
"signal/accuracy_reward/group_std_mean": 0.1954125940799713,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07166883572936059,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07166883572936059,
"signal/advantage_abs_mean": 0.0889064148068428,
"signal/advantage_pre_scale_abs_mean": 0.0889064148068428,
"signal/advantage_pre_scale_std": 0.14890001118183135,
"signal/advantage_std": 0.14890001118183135,
"signal/brier_reward/centered_abs_mean": 0.14416175484657287,
"signal/brier_reward/group_bin_occupancy": 0.8413194444444445,
"signal/brier_reward/group_std_mean": 0.18464682400226592,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01441617514938116,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01441617514938116,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03026603311300278,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8399305555555555,
"signal/confidence_uniqueness_reward/group_std_mean": 0.050313469022512436,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003026603301987052,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003026603301987052,
"signal/format_reward/centered_abs_mean": 0.019162326864898205,
"signal/format_reward/group_bin_occupancy": 0.14479166666666665,
"signal/format_reward/group_std_mean": 0.036996308341622354,
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009581163432449103,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009581163432449103,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021472658263519406,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6767361111111111,
"signal/frontier_aurc_reward/group_std_mean": 0.0038169473875313996,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.684082428459078e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.684082428459078e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1832861989736557,
"signal/frontier_coverage_0/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_0/group_std_mean": 0.24093341827392578,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022910774918273092,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022910774918273092,
"signal/frontier_coverage_1/centered_abs_mean": 0.1832861989736557,
"signal/frontier_coverage_1/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_1/group_std_mean": 0.24093341827392578,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022910774918273092,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022910774918273092,
"signal/frontier_coverage_10/centered_abs_mean": 0.18060127198696135,
"signal/frontier_coverage_10/group_bin_occupancy": 0.84375,
"signal/frontier_coverage_10/group_std_mean": 0.2375454157590866,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022575160022825004,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022575160022825004,
"signal/frontier_coverage_15/centered_abs_mean": 0.08088361173868179,
"signal/frontier_coverage_15/group_bin_occupancy": 0.867013888888889,
"signal/frontier_coverage_15/group_std_mean": 0.10819252133369446,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010110451141372323,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010110451141372323,
"signal/frontier_coverage_20/centered_abs_mean": 0.07415172904729843,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9243055555555555,
"signal/frontier_coverage_20/group_std_mean": 0.09439714550971985,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009268965688534081,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009268965688534081,
"signal/frontier_coverage_25/centered_abs_mean": 0.11941829919815064,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8850694444444445,
"signal/frontier_coverage_25/group_std_mean": 0.15383650064468385,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014927288517355918,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014927288517355918,
"signal/frontier_coverage_5/centered_abs_mean": 0.18320149779319764,
"signal/frontier_coverage_5/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_5/group_std_mean": 0.24082353413105012,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002290018741041422,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002290018741041422,
"signal/frontier_ece_reward/centered_abs_mean": 0.01998976320028305,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6802083333333333,
"signal/frontier_ece_reward/group_std_mean": 0.02510824017226696,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001998976385220885,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001998976385220885,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23103305995464324,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7687499999999999,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.30060619711875913,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023103305697441102,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023103305697441102,
"step": 195
},
{
"calibration/aurc": 0.14460988695922733,
"calibration/batch_distribution_entropy": 0.9495513680360439,
"calibration/batch_entropy_100bins": 0.948140998369011,
"calibration/batch_entropy_10bins": 0.9495513680360439,
"calibration/batch_entropy_50bins": 0.9562598495698762,
"calibration/batch_uniqueness": 0.9474501559632424,
"calibration/buffer_distribution_entropy": 0.9853301528282403,
"calibration/buffer_entropy_100bins": 0.9919136390750427,
"calibration/buffer_entropy_10bins": 0.9853301528282403,
"calibration/buffer_entropy_50bins": 0.9910459470294087,
"calibration/confidence_entropy": 0.5112296963776538,
"calibration/coverage@0%": 0.032981623612241454,
"calibration/coverage@1%": 0.032981623612241454,
"calibration/coverage@10%": 0.46781779581637994,
"calibration/coverage@15%": 0.5944394938795089,
"calibration/coverage@20%": 0.7251206752410088,
"calibration/coverage@25%": 0.9559466293867314,
"calibration/coverage@30%": 0.9758530183727034,
"calibration/coverage@5%": 0.08477990939682992,
"calibration/ece": 0.18029392485104195,
"calibration/mean_confidence": 0.6085970431868855,
"calibration/prompt_uniqueness": 0.867308580953482,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009548611111111138,
"completions/max_length": 3404.4,
"completions/max_terminated_length": 3404.4,
"completions/mean_length": 633.7380249023438,
"completions/mean_terminated_length": 639.8541870117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.2,
"epoch": 0.47999400007499904,
"grad_norm": 0.00048563332529738545,
"learning_rate": 2.409638554216868e-07,
"loss": -0.0069,
"num_tokens": 402783183.0,
"reward": 1.0019099831581115,
"reward_std": 0.12241675555706025,
"rewards/accuracy_reward": 0.7002604126930236,
"rewards/brier_reward": 0.7927653312683105,
"rewards/confidence_uniqueness_reward": 0.9402285814285278,
"rewards/format_reward": 0.9902777671813965,
"rewards/frontier_aurc_reward": -0.0022210155380889773,
"rewards/frontier_coverage_0": -0.0035953870275989173,
"rewards/frontier_coverage_1": -0.0035953870275989173,
"rewards/frontier_coverage_10": -0.0024497059057466686,
"rewards/frontier_coverage_15": 0.02303452733904123,
"rewards/frontier_coverage_20": 0.07562950998544693,
"rewards/frontier_coverage_25": 0.15210793316364288,
"rewards/frontier_coverage_5": -0.003559676537406631,
"rewards/frontier_ece_reward": -0.003188342018984258,
"rewards/frontier_entropy_batch_reward": -0.19281545877456666,
"signal/accuracy_reward/centered_abs_mean": 0.1425075948238373,
"signal/accuracy_reward/group_bin_occupancy": 0.190625,
"signal/accuracy_reward/group_std_mean": 0.18748272955417633,
"signal/accuracy_reward/group_zero_std_frac": 0.47500001192092894,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07125379741191865,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07125379741191865,
"signal/advantage_abs_mean": 0.09010151475667953,
"signal/advantage_pre_scale_abs_mean": 0.09010151475667953,
"signal/advantage_pre_scale_std": 0.14910052120685577,
"signal/advantage_std": 0.14910052120685577,
"signal/brier_reward/centered_abs_mean": 0.14125451743602752,
"signal/brier_reward/group_bin_occupancy": 0.8416666666666668,
"signal/brier_reward/group_std_mean": 0.1821454256772995,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014125452749431134,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014125452749431134,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028338390961289407,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.84375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04806696176528931,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002833839226514101,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002833839226514101,
"signal/format_reward/centered_abs_mean": 0.01692708358168602,
"signal/format_reward/group_bin_occupancy": 0.14375,
"signal/format_reward/group_std_mean": 0.03435967043042183,
"signal/format_reward/group_zero_std_frac": 0.85,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00846354179084301,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00846354179084301,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002712964592501521,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6565972222222222,
"signal/frontier_aurc_reward/group_std_mean": 0.005087446887046099,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.391205755178817e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.391205755178817e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17483938336372376,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8496527777777777,
"signal/frontier_coverage_0/group_std_mean": 0.2283846229314804,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00218549226410687,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00218549226410687,
"signal/frontier_coverage_1/centered_abs_mean": 0.17483938336372376,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8496527777777777,
"signal/frontier_coverage_1/group_std_mean": 0.2283846229314804,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00218549226410687,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00218549226410687,
"signal/frontier_coverage_10/centered_abs_mean": 0.17156257033348082,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8479166666666667,
"signal/frontier_coverage_10/group_std_mean": 0.22431055903434755,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002144532185047865,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002144532185047865,
"signal/frontier_coverage_15/centered_abs_mean": 0.07161953896284104,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_15/group_std_mean": 0.09553508013486862,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008952441858127713,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008952441858127713,
"signal/frontier_coverage_20/centered_abs_mean": 0.07612911015748977,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9177083333333332,
"signal/frontier_coverage_20/group_std_mean": 0.0976193055510521,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000951613939832896,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000951613939832896,
"signal/frontier_coverage_25/centered_abs_mean": 0.12673709094524382,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9010416666666666,
"signal/frontier_coverage_25/group_std_mean": 0.16289995312690736,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015842135995626449,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015842135995626449,
"signal/frontier_coverage_5/centered_abs_mean": 0.1747281402349472,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85,
"signal/frontier_coverage_5/group_std_mean": 0.22824438512325287,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021841016598045824,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021841016598045824,
"signal/frontier_ece_reward/centered_abs_mean": 0.01971760131418705,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6673611111111111,
"signal/frontier_ece_reward/group_std_mean": 0.024473632127046584,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001971760136075318,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001971760136075318,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2508280843496323,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7565972222222223,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3208978533744812,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02508280873298645,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02508280873298645,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_calibration/aurc": 0.1313149807340917,
"eval_calibration/batch_distribution_entropy": 0.9010256730205485,
"eval_calibration/batch_entropy_100bins": 0.705057482475523,
"eval_calibration/batch_entropy_10bins": 0.9010256730205485,
"eval_calibration/batch_entropy_50bins": 0.7709351303877786,
"eval_calibration/batch_uniqueness": 0.8971089956208811,
"eval_calibration/buffer_distribution_entropy": 0.9850070003261059,
"eval_calibration/buffer_entropy_100bins": 0.991778938093653,
"eval_calibration/buffer_entropy_10bins": 0.9850070003261059,
"eval_calibration/buffer_entropy_50bins": 0.9908763990301034,
"eval_calibration/confidence_entropy": 0.4905625738638028,
"eval_calibration/coverage@0%": 0.245127688172043,
"eval_calibration/coverage@1%": 0.245127688172043,
"eval_calibration/coverage@10%": 0.526377688172043,
"eval_calibration/coverage@15%": 0.651377688172043,
"eval_calibration/coverage@20%": 0.814516129032258,
"eval_calibration/coverage@25%": 0.9578293010752689,
"eval_calibration/coverage@30%": 0.9947916666666666,
"eval_calibration/coverage@5%": 0.245127688172043,
"eval_calibration/ece": 0.22274469707586766,
"eval_calibration/mean_confidence": 0.5956210189997321,
"eval_calibration/prompt_uniqueness": 0.8971089956208811,
"eval_completions/clipped_ratio": 0.011284722222222229,
"eval_completions/max_length": 2300.8333333333335,
"eval_completions/max_terminated_length": 2300.8333333333335,
"eval_completions/mean_length": 645.4890747070312,
"eval_completions/mean_terminated_length": 652.927968343099,
"eval_completions/min_length": 50.0,
"eval_completions/min_terminated_length": 223.0,
"eval_loss": 0.0,
"eval_num_tokens": 402783183.0,
"eval_reward": 0.9437916080156962,
"eval_reward_std": 0.243720144033432,
"eval_rewards/accuracy_reward": 0.6909722288449606,
"eval_rewards/brier_reward": 0.7851507067680359,
"eval_rewards/confidence_uniqueness_reward": 0.8838565051555634,
"eval_rewards/format_reward": 0.9869791666666666,
"eval_rewards/frontier_aurc_reward": -0.0024154275791564337,
"eval_rewards/frontier_coverage_0": -0.0033070078740517297,
"eval_rewards/frontier_coverage_1": -0.0033070078740517297,
"eval_rewards/frontier_coverage_10": -0.0025324359691391387,
"eval_rewards/frontier_coverage_15": 0.024358487998445828,
"eval_rewards/frontier_coverage_20": 0.07857182746132214,
"eval_rewards/frontier_coverage_25": 0.1538891519109408,
"eval_rewards/frontier_coverage_5": -0.003267340362071991,
"eval_rewards/frontier_ece_reward": -0.002330610683808724,
"eval_rewards/frontier_entropy_batch_reward": -0.6487665772438049,
"eval_runtime": 207.9871,
"eval_samples_per_second": 4.808,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4146050314108531,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.46133896211783093,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20730251570542654,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20730251570542654,
"eval_signal/advantage_abs_mean": 0.20695754885673523,
"eval_signal/advantage_pre_scale_abs_mean": 0.20695754885673523,
"eval_signal/advantage_pre_scale_std": 0.24312934776147208,
"eval_signal/advantage_std": 0.24312934776147208,
"eval_signal/brier_reward/centered_abs_mean": 0.19702969988187155,
"eval_signal/brier_reward/group_bin_occupancy": 0.8784722222222223,
"eval_signal/brier_reward/group_std_mean": 0.2518209119637807,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019702970360716183,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019702970360716183,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05498677988847097,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.38888888888888884,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08798779795567195,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005498677957803011,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005498677957803011,
"eval_signal/format_reward/centered_abs_mean": 0.024576822761446238,
"eval_signal/format_reward/group_bin_occupancy": 0.1597222222222222,
"eval_signal/format_reward/group_std_mean": 0.05818357535948356,
"eval_signal/format_reward/group_zero_std_frac": 0.7222222487131754,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012288411380723119,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012288411380723119,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004131359804887325,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5694444444444444,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009391291804301241,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.16420004714746e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.16420004714746e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.25250349193811417,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9027777777777778,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3640611221392949,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003156293804446856,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003156293804446856,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.25250349193811417,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9027777777777778,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3640611221392949,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003156293804446856,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003156293804446856,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.24736239512761435,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9027777777777778,
"eval_signal/frontier_coverage_10/group_std_mean": 0.3576079159975052,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030920300244664154,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030920300244664154,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.08761641258994739,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8854166666666666,
"eval_signal/frontier_coverage_15/group_std_mean": 0.13278244932492575,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010952051864781727,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010952051864781727,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1130032017827034,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9305555555555555,
"eval_signal/frontier_coverage_20/group_std_mean": 0.14612068235874176,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014125400533278782,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014125400533278782,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2178284153342247,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9270833333333334,
"eval_signal/frontier_coverage_25/group_std_mean": 0.2713290477792422,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027228551916778088,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027228551916778088,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2523079713185628,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9027777777777778,
"eval_signal/frontier_coverage_5/group_std_mean": 0.36381277441978455,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031538497811804214,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031538497811804214,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.02772780228406191,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8506944444444445,
"eval_signal/frontier_ece_reward/group_std_mean": 0.03577593838175138,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00277278032929947,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00277278032929947,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.322118878364563,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.28472222222222227,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3369586815436681,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03221188889195522,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03221188889195522,
"eval_steps_per_second": 0.029,
"step": 200
},
{
"calibration/aurc": 0.19286525379114844,
"calibration/batch_distribution_entropy": 0.9508638400230053,
"calibration/batch_entropy_100bins": 0.9472046153786987,
"calibration/batch_entropy_10bins": 0.9508638400230053,
"calibration/batch_entropy_50bins": 0.9586811454627515,
"calibration/batch_uniqueness": 0.9478697952898288,
"calibration/buffer_distribution_entropy": 0.985099606750224,
"calibration/buffer_entropy_100bins": 0.9918358877130297,
"calibration/buffer_entropy_10bins": 0.985099606750224,
"calibration/buffer_entropy_50bins": 0.9909422179315424,
"calibration/confidence_entropy": 0.4997781015447198,
"calibration/coverage@0%": 0.01370268496669971,
"calibration/coverage@1%": 0.01370268496669971,
"calibration/coverage@10%": 0.08365004677399088,
"calibration/coverage@15%": 0.39644238421691963,
"calibration/coverage@20%": 0.611257871294732,
"calibration/coverage@25%": 0.8797210315410284,
"calibration/coverage@30%": 0.939168679577219,
"calibration/coverage@5%": 0.01370268496669971,
"calibration/ece": 0.14749019364217641,
"calibration/mean_confidence": 0.606638199869165,
"calibration/prompt_uniqueness": 0.8656049290865866,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008767361111111116,
"completions/max_length": 3214.0,
"completions/max_terminated_length": 3214.0,
"completions/mean_length": 652.1061767578125,
"completions/mean_terminated_length": 657.898583984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.6,
"epoch": 0.491993850076874,
"grad_norm": 0.0003685148840304464,
"learning_rate": 9.036144578313253e-08,
"loss": -0.0055,
"num_tokens": 413361398.0,
"reward": 1.0259872198104858,
"reward_std": 0.11690017282962799,
"rewards/accuracy_reward": 0.7453993082046508,
"rewards/brier_reward": 0.7930923223495483,
"rewards/confidence_uniqueness_reward": 0.9420808672904968,
"rewards/format_reward": 0.9911458373069764,
"rewards/frontier_aurc_reward": -0.0016583121148869395,
"rewards/frontier_coverage_0": -0.030156330950558186,
"rewards/frontier_coverage_1": -0.030156330950558186,
"rewards/frontier_coverage_10": -0.028441790863871573,
"rewards/frontier_coverage_15": 0.021205396763980387,
"rewards/frontier_coverage_20": 0.08911058455705642,
"rewards/frontier_coverage_25": 0.17736924588680267,
"rewards/frontier_coverage_5": -0.0300977423787117,
"rewards/frontier_ece_reward": -0.006220728810876608,
"rewards/frontier_entropy_batch_reward": -0.17270275056362153,
"signal/accuracy_reward/centered_abs_mean": 0.14183485507965088,
"signal/accuracy_reward/group_bin_occupancy": 0.1951388888888889,
"signal/accuracy_reward/group_std_mean": 0.19232783019542693,
"signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07091742753982544,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07091742753982544,
"signal/advantage_abs_mean": 0.0845324456691742,
"signal/advantage_pre_scale_abs_mean": 0.0845324456691742,
"signal/advantage_pre_scale_std": 0.1428141325712204,
"signal/advantage_std": 0.1428141325712204,
"signal/brier_reward/centered_abs_mean": 0.1418829470872879,
"signal/brier_reward/group_bin_occupancy": 0.84375,
"signal/brier_reward/group_std_mean": 0.18193072974681854,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014188294671475888,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014188294671475888,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026093775033950807,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.865625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04311029836535454,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026093775872141124,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026093775872141124,
"signal/format_reward/centered_abs_mean": 0.014822048880159856,
"signal/format_reward/group_bin_occupancy": 0.140625,
"signal/format_reward/group_std_mean": 0.029389195144176483,
"signal/format_reward/group_zero_std_frac": 0.875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007411024440079928,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007411024440079928,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002050434215925634,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6666666666666667,
"signal/frontier_aurc_reward/group_std_mean": 0.003803052147850394,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.563042944530025e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.563042944530025e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18497320711612703,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8427083333333334,
"signal/frontier_coverage_0/group_std_mean": 0.24077284038066865,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002312165219336748,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002312165219336748,
"signal/frontier_coverage_1/centered_abs_mean": 0.18497320711612703,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8427083333333334,
"signal/frontier_coverage_1/group_std_mean": 0.24077284038066865,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002312165219336748,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002312165219336748,
"signal/frontier_coverage_10/centered_abs_mean": 0.18142623901367189,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8402777777777777,
"signal/frontier_coverage_10/group_std_mean": 0.23639352917671203,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022678279783576727,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022678279783576727,
"signal/frontier_coverage_15/centered_abs_mean": 0.07308610081672669,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8972222222222221,
"signal/frontier_coverage_15/group_std_mean": 0.09623065441846848,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009135762811638415,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009135762811638415,
"signal/frontier_coverage_20/centered_abs_mean": 0.07907227426767349,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9204861111111112,
"signal/frontier_coverage_20/group_std_mean": 0.10045547783374786,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009884034050628542,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009884034050628542,
"signal/frontier_coverage_25/centered_abs_mean": 0.12448778450489044,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8934027777777779,
"signal/frontier_coverage_25/group_std_mean": 0.1596580684185028,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015560972038656472,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015560972038656472,
"signal/frontier_coverage_5/centered_abs_mean": 0.18486446142196655,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8427083333333334,
"signal/frontier_coverage_5/group_std_mean": 0.24063428342342377,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002310805721208453,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002310805721208453,
"signal/frontier_ece_reward/centered_abs_mean": 0.020203196629881858,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6336805555555556,
"signal/frontier_ece_reward/group_std_mean": 0.02513127215206623,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020203196443617346,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020203196443617346,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22744437754154206,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7774305555555555,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2937332093715668,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022744438052177428,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022744438052177428,
"step": 205
},
{
"calibration/aurc": 0.10604995661886611,
"calibration/batch_distribution_entropy": 0.9459823056432164,
"calibration/batch_entropy_100bins": 0.9445751943961431,
"calibration/batch_entropy_10bins": 0.9459823056432164,
"calibration/batch_entropy_50bins": 0.9524671961811779,
"calibration/batch_uniqueness": 0.945795930081056,
"calibration/buffer_distribution_entropy": 0.9843894309209239,
"calibration/buffer_entropy_100bins": 0.991489315771584,
"calibration/buffer_entropy_10bins": 0.9843894309209239,
"calibration/buffer_entropy_50bins": 0.9905221962633863,
"calibration/confidence_entropy": 0.49619946668569376,
"calibration/coverage@0%": 0.0810229902207145,
"calibration/coverage@1%": 0.0810229902207145,
"calibration/coverage@10%": 0.5336122948397576,
"calibration/coverage@15%": 0.7811456966373963,
"calibration/coverage@20%": 0.8943865740740741,
"calibration/coverage@25%": 0.9825562169312169,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.28413915588222977,
"calibration/ece": 0.13840996405365147,
"calibration/mean_confidence": 0.6308651120728337,
"calibration/prompt_uniqueness": 0.8631049911748535,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00868055555555558,
"completions/max_length": 3621.0,
"completions/max_terminated_length": 3621.0,
"completions/mean_length": 655.096923828125,
"completions/mean_terminated_length": 660.8446044921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.33333333333334,
"epoch": 0.49919376007799904,
"num_tokens": 419755476.0,
"reward": 1.001288930575053,
"reward_std": 0.11777538061141968,
"rewards/accuracy_reward": 0.6956018408139547,
"rewards/brier_reward": 0.7960908611615499,
"rewards/confidence_uniqueness_reward": 0.94153360525767,
"rewards/format_reward": 0.9911747574806213,
"rewards/frontier_aurc_reward": -0.0013014324552689989,
"rewards/frontier_coverage_0": 0.00015948344177256027,
"rewards/frontier_coverage_1": 0.00015948344177256027,
"rewards/frontier_coverage_10": 0.0008107475781192383,
"rewards/frontier_coverage_15": 0.027395144725839298,
"rewards/frontier_coverage_20": 0.08610829710960388,
"rewards/frontier_coverage_25": 0.1655142605304718,
"rewards/frontier_coverage_5": 0.0002017094132800897,
"rewards/frontier_ece_reward": -0.0024814563415323696,
"rewards/frontier_entropy_batch_reward": -0.19101824859778085,
"signal/accuracy_reward/centered_abs_mean": 0.14246961971124014,
"signal/accuracy_reward/group_bin_occupancy": 0.1909722222222222,
"signal/accuracy_reward/group_std_mean": 0.1864061305920283,
"signal/accuracy_reward/group_zero_std_frac": 0.4722222089767456,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07123480985562007,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07123480985562007,
"signal/advantage_abs_mean": 0.08667557189861934,
"signal/advantage_pre_scale_abs_mean": 0.08667557189861934,
"signal/advantage_pre_scale_std": 0.1438957303762436,
"signal/advantage_std": 0.1438957303762436,
"signal/brier_reward/centered_abs_mean": 0.14015839993953705,
"signal/brier_reward/group_bin_occupancy": 0.8518518518518517,
"signal/brier_reward/group_std_mean": 0.17842622101306915,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014015840366482735,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014015840366482735,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027561215683817863,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8454861111111112,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04839188729723295,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002756121257940928,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002756121257940928,
"signal/format_reward/centered_abs_mean": 0.01621274556964636,
"signal/format_reward/group_bin_occupancy": 0.14467592592592593,
"signal/format_reward/group_std_mean": 0.034930519138773285,
"signal/format_reward/group_zero_std_frac": 0.8425925970077515,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00810637278482318,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00810637278482318,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016874617819363873,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6921296296296297,
"signal/frontier_aurc_reward/group_std_mean": 0.0032867664316048226,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1093272759268682e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1093272759268682e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19459756712118784,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8425925925925926,
"signal/frontier_coverage_0/group_std_mean": 0.24988562365372977,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024324696666250625,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024324696666250625,
"signal/frontier_coverage_1/centered_abs_mean": 0.19459756712118784,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8425925925925926,
"signal/frontier_coverage_1/group_std_mean": 0.24988562365372977,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024324696666250625,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024324696666250625,
"signal/frontier_coverage_10/centered_abs_mean": 0.19071337580680847,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8431712962962963,
"signal/frontier_coverage_10/group_std_mean": 0.2450977216164271,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002383917337283492,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002383917337283492,
"signal/frontier_coverage_15/centered_abs_mean": 0.07185898224512736,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9074074074074074,
"signal/frontier_coverage_15/group_std_mean": 0.09349055091540019,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008982373401522636,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008982373401522636,
"signal/frontier_coverage_20/centered_abs_mean": 0.07390168060859044,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9201388888888888,
"signal/frontier_coverage_20/group_std_mean": 0.09523183355728786,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009237710037268698,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009237710037268698,
"signal/frontier_coverage_25/centered_abs_mean": 0.11694104224443436,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8831018518518517,
"signal/frontier_coverage_25/group_std_mean": 0.15213672816753387,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014617630513384938,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014617630513384938,
"signal/frontier_coverage_5/centered_abs_mean": 0.19448575377464294,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8425925925925926,
"signal/frontier_coverage_5/group_std_mean": 0.24974611898263296,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024310719842712083,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024310719842712083,
"signal/frontier_ece_reward/centered_abs_mean": 0.02061399631202221,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6296296296296297,
"signal/frontier_ece_reward/group_std_mean": 0.025443398704131443,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00206139978642265,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00206139978642265,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24270604054133096,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7702546296296297,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3108425835768382,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0242706040541331,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0242706040541331,
"step": 208,
"total_flos": 0.0,
"train_loss": -0.009165088099857362,
"train_runtime": 38159.999,
"train_samples_per_second": 0.393,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 208,
"num_input_tokens_seen": 419755476,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}