Files
RLCR-v4-ks-uniqueness-noece…/trainer_state.json
ModelHub XC e37f57c428 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-noece-noaurc-cold-math
Source: Original Platform
2026-04-23 18:57:23 +08:00

5399 lines
333 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49919376007799904,
"eval_steps": 50,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.49341160918807886,
"calibration/batch_distribution_entropy": 0.2677708973812775,
"calibration/confidence_entropy": 0.2110494430232559,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4607692188269585,
"calibration/mean_confidence": 0.9172499545900774,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020659722222222232,
"completions/max_length": 4003.8,
"completions/max_terminated_length": 4003.8,
"completions/mean_length": 512.6473083496094,
"completions/mean_terminated_length": 523.4645141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011999850001874977,
"grad_norm": 0.004103545099496841,
"learning_rate": 5.952380952380953e-07,
"loss": 0.0037,
"num_tokens": 9019905.0,
"reward": 0.5161375880241394,
"reward_std": 0.4549876987934113,
"rewards/accuracy_reward": 0.2603298544883728,
"rewards/brier_reward": 0.31216180324554443,
"rewards/confidence_uniqueness_reward": 0.290114027261734,
"rewards/format_reward": 0.5964409589767456,
"rewards/frontier_coverage_0": 0.27497095465660093,
"rewards/frontier_coverage_1": 0.27497095465660093,
"rewards/frontier_coverage_10": 0.27497095465660093,
"rewards/frontier_coverage_15": 0.27497095465660093,
"rewards/frontier_coverage_20": 0.27497095465660093,
"rewards/frontier_coverage_25": 0.27497095465660093,
"rewards/frontier_coverage_5": 0.27497095465660093,
"signal/accuracy_reward/centered_abs_mean": 0.30428059697151183,
"signal/accuracy_reward/group_std_mean": 0.3650037467479706,
"signal/accuracy_reward/group_zero_std_frac": 0.0916666679084301,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15214029848575591,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15214029848575591,
"signal/advantage_abs_mean": 0.3943765044212341,
"signal/advantage_pre_scale_abs_mean": 0.3943765044212341,
"signal/advantage_pre_scale_std": 0.4582944571971893,
"signal/advantage_std": 0.4582944571971893,
"signal/brier_reward/centered_abs_mean": 0.3165676236152649,
"signal/brier_reward/group_std_mean": 0.370278537273407,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03165676258504391,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03165676258504391,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23938581049442292,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2900544762611389,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023938581719994544,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023938581719994544,
"signal/format_reward/centered_abs_mean": 0.43942599892616274,
"signal/format_reward/group_std_mean": 0.474114316701889,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21971299946308137,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.21971299946308137,
"signal/frontier_coverage_0/centered_abs_mean": 0.3062441885471344,
"signal/frontier_coverage_0/group_std_mean": 0.36501355171203614,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_1/centered_abs_mean": 0.3062441885471344,
"signal/frontier_coverage_1/group_std_mean": 0.36501355171203614,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_10/centered_abs_mean": 0.3062441885471344,
"signal/frontier_coverage_10/group_std_mean": 0.36501355171203614,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_15/centered_abs_mean": 0.3062441885471344,
"signal/frontier_coverage_15/group_std_mean": 0.36501355171203614,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_20/centered_abs_mean": 0.3062441885471344,
"signal/frontier_coverage_20/group_std_mean": 0.36501355171203614,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_25/centered_abs_mean": 0.3062441885471344,
"signal/frontier_coverage_25/group_std_mean": 0.36501355171203614,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_5/centered_abs_mean": 0.3062441885471344,
"signal/frontier_coverage_5/group_std_mean": 0.36501355171203614,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00437929192557931,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00437929192557931,
"step": 5
},
{
"calibration/aurc": 0.517125217183162,
"calibration/batch_distribution_entropy": 0.27050996094866103,
"calibration/confidence_entropy": 0.21723002440868527,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.47366067204089707,
"calibration/mean_confidence": 0.91929693281871,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017795138888888885,
"completions/max_length": 3914.2,
"completions/max_terminated_length": 3914.2,
"completions/mean_length": 473.3821228027344,
"completions/mean_terminated_length": 482.12042236328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 29.2,
"epoch": 0.023999700003749954,
"grad_norm": 0.0036878257524222136,
"learning_rate": 1.1904761904761906e-06,
"loss": 0.0018,
"num_tokens": 17555987.0,
"reward": 0.6115049719810486,
"reward_std": 0.42418900728225706,
"rewards/accuracy_reward": 0.29661458134651186,
"rewards/brier_reward": 0.3612636148929596,
"rewards/confidence_uniqueness_reward": 0.35831656455993655,
"rewards/format_reward": 0.7197048544883728,
"rewards/frontier_coverage_0": 0.3135582983493805,
"rewards/frontier_coverage_1": 0.3135582983493805,
"rewards/frontier_coverage_10": 0.3135582983493805,
"rewards/frontier_coverage_15": 0.3135582983493805,
"rewards/frontier_coverage_20": 0.3135582983493805,
"rewards/frontier_coverage_25": 0.3135582983493805,
"rewards/frontier_coverage_5": 0.3135582983493805,
"signal/accuracy_reward/centered_abs_mean": 0.3206434488296509,
"signal/accuracy_reward/group_std_mean": 0.37941792607307434,
"signal/accuracy_reward/group_zero_std_frac": 0.08055555671453477,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16032172441482545,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16032172441482545,
"signal/advantage_abs_mean": 0.34988189339637754,
"signal/advantage_pre_scale_abs_mean": 0.34988189339637754,
"signal/advantage_pre_scale_std": 0.42775319814682006,
"signal/advantage_std": 0.42775319814682006,
"signal/brier_reward/centered_abs_mean": 0.3171759068965912,
"signal/brier_reward/group_std_mean": 0.3707857489585876,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03171758912503719,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03171758912503719,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22490560114383698,
"signal/confidence_uniqueness_reward/group_std_mean": 0.28029904961586,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02249056026339531,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02249056026339531,
"signal/format_reward/centered_abs_mean": 0.35055881142616274,
"signal/format_reward/group_std_mean": 0.41676204204559325,
"signal/format_reward/group_zero_std_frac": 0.00555555559694767,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17527940571308137,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.17527940571308137,
"signal/frontier_coverage_0/centered_abs_mean": 0.3156200468540192,
"signal/frontier_coverage_0/group_std_mean": 0.3727039873600006,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_1/centered_abs_mean": 0.3156200468540192,
"signal/frontier_coverage_1/group_std_mean": 0.3727039873600006,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_10/centered_abs_mean": 0.3156200468540192,
"signal/frontier_coverage_10/group_std_mean": 0.3727039873600006,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_15/centered_abs_mean": 0.3156200468540192,
"signal/frontier_coverage_15/group_std_mean": 0.3727039873600006,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_20/centered_abs_mean": 0.3156200468540192,
"signal/frontier_coverage_20/group_std_mean": 0.3727039873600006,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_25/centered_abs_mean": 0.3156200468540192,
"signal/frontier_coverage_25/group_std_mean": 0.3727039873600006,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_5/centered_abs_mean": 0.3156200468540192,
"signal/frontier_coverage_5/group_std_mean": 0.3727039873600006,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004513366613537073,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004513366613537073,
"step": 10
},
{
"calibration/aurc": 0.5414595000464721,
"calibration/batch_distribution_entropy": 0.2774047038997945,
"calibration/confidence_entropy": 0.22813624148006326,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5205185786094126,
"calibration/mean_confidence": 0.9187366141594058,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012239583333333326,
"completions/max_length": 3908.2,
"completions/max_terminated_length": 3908.2,
"completions/mean_length": 412.8827331542969,
"completions/mean_terminated_length": 418.0228271484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 38.6,
"epoch": 0.03599955000562493,
"grad_norm": 0.0015186438104137778,
"learning_rate": 1.7857142857142859e-06,
"loss": -0.0113,
"num_tokens": 25414380.0,
"reward": 0.7417026281356811,
"reward_std": 0.31185888051986693,
"rewards/accuracy_reward": 0.30269097089767455,
"rewards/brier_reward": 0.4077360093593597,
"rewards/confidence_uniqueness_reward": 0.5074092388153076,
"rewards/format_reward": 0.93125,
"rewards/frontier_coverage_0": 0.33184386491775514,
"rewards/frontier_coverage_1": 0.33184386491775514,
"rewards/frontier_coverage_10": 0.33184386491775514,
"rewards/frontier_coverage_15": 0.33184386491775514,
"rewards/frontier_coverage_20": 0.33184386491775514,
"rewards/frontier_coverage_25": 0.33184386491775514,
"rewards/frontier_coverage_5": 0.33184386491775514,
"signal/accuracy_reward/centered_abs_mean": 0.31045464873313905,
"signal/accuracy_reward/group_std_mean": 0.3683877825737,
"signal/accuracy_reward/group_zero_std_frac": 0.10000000223517418,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15522732436656952,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15522732436656952,
"signal/advantage_abs_mean": 0.24663678109645842,
"signal/advantage_pre_scale_abs_mean": 0.24663678109645842,
"signal/advantage_pre_scale_std": 0.3215538918972015,
"signal/advantage_std": 0.3215538918972015,
"signal/brier_reward/centered_abs_mean": 0.29533362984657285,
"signal/brier_reward/group_std_mean": 0.34695218205451966,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029533364251255988,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.029533364251255988,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.179153373837471,
"signal/confidence_uniqueness_reward/group_std_mean": 0.22881248593330383,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017915337532758712,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017915337532758712,
"signal/format_reward/centered_abs_mean": 0.11593967080116271,
"signal/format_reward/group_std_mean": 0.19709881097078324,
"signal/format_reward/group_zero_std_frac": 0.280555559694767,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05796983540058136,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.05796983540058136,
"signal/frontier_coverage_0/centered_abs_mean": 0.3037257790565491,
"signal/frontier_coverage_0/group_std_mean": 0.3585019886493683,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_1/centered_abs_mean": 0.3037257790565491,
"signal/frontier_coverage_1/group_std_mean": 0.3585019886493683,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_10/centered_abs_mean": 0.3037257790565491,
"signal/frontier_coverage_10/group_std_mean": 0.3585019886493683,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_15/centered_abs_mean": 0.3037257790565491,
"signal/frontier_coverage_15/group_std_mean": 0.3585019886493683,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_20/centered_abs_mean": 0.3037257790565491,
"signal/frontier_coverage_20/group_std_mean": 0.3585019886493683,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_25/centered_abs_mean": 0.3037257790565491,
"signal/frontier_coverage_25/group_std_mean": 0.3585019886493683,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_5/centered_abs_mean": 0.3037257790565491,
"signal/frontier_coverage_5/group_std_mean": 0.3585019886493683,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0043432785663753744,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0043432785663753744,
"step": 15
},
{
"calibration/aurc": 0.4847516582072403,
"calibration/batch_distribution_entropy": 0.3599406528071635,
"calibration/buffer_distribution_entropy": 0.2936503471332332,
"calibration/confidence_entropy": 0.29304720208371154,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4180637786651644,
"calibration/mean_confidence": 0.8936602458423843,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010763888888888884,
"completions/max_length": 3824.4,
"completions/max_terminated_length": 3824.4,
"completions/mean_length": 432.4386352539062,
"completions/mean_terminated_length": 437.1669494628906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.04799940000749991,
"grad_norm": 0.0023721419274806976,
"learning_rate": 2.380952380952381e-06,
"loss": -0.0075,
"num_tokens": 33509769.0,
"reward": 0.8219772577285767,
"reward_std": 0.23951346278190613,
"rewards/accuracy_reward": 0.40755208730697634,
"rewards/brier_reward": 0.5194338917732239,
"rewards/confidence_uniqueness_reward": 0.5695141077041626,
"rewards/format_reward": 0.9841145753860474,
"rewards/frontier_coverage_0": 0.17231892738491297,
"rewards/frontier_coverage_1": 0.17231892738491297,
"rewards/frontier_coverage_10": 0.17231892738491297,
"rewards/frontier_coverage_15": 0.17231892738491297,
"rewards/frontier_coverage_20": 0.17231892738491297,
"rewards/frontier_coverage_25": 0.17231892738491297,
"rewards/frontier_coverage_5": 0.17231892738491297,
"signal/accuracy_reward/centered_abs_mean": 0.28974066972732543,
"signal/accuracy_reward/group_std_mean": 0.3587852954864502,
"signal/accuracy_reward/group_zero_std_frac": 0.07777778059244156,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14487033486366271,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14487033486366271,
"signal/advantage_abs_mean": 0.1873514622449875,
"signal/advantage_pre_scale_abs_mean": 0.1873514622449875,
"signal/advantage_pre_scale_std": 0.24830279350280762,
"signal/advantage_std": 0.24830279350280762,
"signal/brier_reward/centered_abs_mean": 0.2584921300411224,
"signal/brier_reward/group_std_mean": 0.31671356558799746,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025849214196205138,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.025849214196205138,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17959446012973784,
"signal/confidence_uniqueness_reward/group_std_mean": 0.21536695957183838,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01795944608747959,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01795944608747959,
"signal/format_reward/centered_abs_mean": 0.02878146693110466,
"signal/format_reward/group_std_mean": 0.059326070547103885,
"signal/format_reward/group_zero_std_frac": 0.7416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01439073346555233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01439073346555233,
"signal/frontier_coverage_0/centered_abs_mean": 0.12399653047323227,
"signal/frontier_coverage_0/group_std_mean": 0.16216810792684555,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_1/centered_abs_mean": 0.12399653047323227,
"signal/frontier_coverage_1/group_std_mean": 0.16216810792684555,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_10/centered_abs_mean": 0.12399653047323227,
"signal/frontier_coverage_10/group_std_mean": 0.16216810792684555,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_15/centered_abs_mean": 0.12399653047323227,
"signal/frontier_coverage_15/group_std_mean": 0.16216810792684555,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_20/centered_abs_mean": 0.12399653047323227,
"signal/frontier_coverage_20/group_std_mean": 0.16216810792684555,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_25/centered_abs_mean": 0.12399653047323227,
"signal/frontier_coverage_25/group_std_mean": 0.16216810792684555,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_5/centered_abs_mean": 0.12399653047323227,
"signal/frontier_coverage_5/group_std_mean": 0.16216810792684555,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017731502826791256,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017731502826791256,
"step": 20
},
{
"calibration/aurc": 0.38634314240054995,
"calibration/batch_distribution_entropy": 0.46526513959059035,
"calibration/buffer_distribution_entropy": 0.32973422005179354,
"calibration/confidence_entropy": 0.3478899647822179,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.00783289817232376,
"calibration/coverage@25%": 0.13632165978831523,
"calibration/coverage@30%": 0.225260832701648,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.308634197428995,
"calibration/mean_confidence": 0.8684335224123025,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009722222222222233,
"completions/max_length": 3845.2,
"completions/max_terminated_length": 3845.2,
"completions/mean_length": 477.2792663574219,
"completions/mean_terminated_length": 481.96564331054685,
"completions/min_length": 0.0,
"completions/min_terminated_length": 96.4,
"epoch": 0.05999925000937488,
"grad_norm": 0.0009343558340333402,
"learning_rate": 2.9761904761904763e-06,
"loss": -0.0071,
"num_tokens": 42132474.0,
"reward": 0.8692076325416564,
"reward_std": 0.21406486630439758,
"rewards/accuracy_reward": 0.5006076395511627,
"rewards/brier_reward": 0.6141545534133911,
"rewards/confidence_uniqueness_reward": 0.6311920523643494,
"rewards/format_reward": 0.9878472208976745,
"rewards/frontier_coverage_0": 0.0044506344594992696,
"rewards/frontier_coverage_1": 0.0044506344594992696,
"rewards/frontier_coverage_10": 0.0044506344594992696,
"rewards/frontier_coverage_15": 0.0044506344594992696,
"rewards/frontier_coverage_20": 0.0044506344594992696,
"rewards/frontier_coverage_25": 0.0044506344594992696,
"rewards/frontier_coverage_5": 0.0044506344594992696,
"signal/accuracy_reward/centered_abs_mean": 0.28586697578430176,
"signal/accuracy_reward/group_std_mean": 0.35248740911483767,
"signal/accuracy_reward/group_zero_std_frac": 0.09444444701075554,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14293348789215088,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14293348789215088,
"signal/advantage_abs_mean": 0.16929234862327575,
"signal/advantage_pre_scale_abs_mean": 0.16929234862327575,
"signal/advantage_pre_scale_std": 0.22722831070423127,
"signal/advantage_std": 0.22722831070423127,
"signal/brier_reward/centered_abs_mean": 0.2289237290620804,
"signal/brier_reward/group_std_mean": 0.2827379047870636,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02289237417280674,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02289237417280674,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1367410659790039,
"signal/confidence_uniqueness_reward/group_std_mean": 0.16966789066791535,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013674106262624264,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013674106262624264,
"signal/format_reward/centered_abs_mean": 0.021506076492369176,
"signal/format_reward/group_std_mean": 0.04259942732751369,
"signal/format_reward/group_zero_std_frac": 0.8194444656372071,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010753038246184588,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010753038246184588,
"signal/frontier_coverage_0/centered_abs_mean": 0.02831830345094204,
"signal/frontier_coverage_0/group_std_mean": 0.0470881313085556,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_1/centered_abs_mean": 0.02831830345094204,
"signal/frontier_coverage_1/group_std_mean": 0.0470881313085556,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_10/centered_abs_mean": 0.02831830345094204,
"signal/frontier_coverage_10/group_std_mean": 0.0470881313085556,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_15/centered_abs_mean": 0.02831830345094204,
"signal/frontier_coverage_15/group_std_mean": 0.0470881313085556,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_20/centered_abs_mean": 0.02831830345094204,
"signal/frontier_coverage_20/group_std_mean": 0.0470881313085556,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_25/centered_abs_mean": 0.02831830345094204,
"signal/frontier_coverage_25/group_std_mean": 0.0470881313085556,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_5/centered_abs_mean": 0.02831830345094204,
"signal/frontier_coverage_5/group_std_mean": 0.0470881313085556,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0004049517388921231,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0004049517388921231,
"step": 25
},
{
"calibration/aurc": 0.31364832770276674,
"calibration/batch_distribution_entropy": 0.499710605804783,
"calibration/buffer_distribution_entropy": 0.38684449581318747,
"calibration/confidence_entropy": 0.422717480258496,
"calibration/coverage@0%": 0.010080906132506668,
"calibration/coverage@1%": 0.010080906132506668,
"calibration/coverage@10%": 0.010080906132506668,
"calibration/coverage@15%": 0.039589102853818135,
"calibration/coverage@20%": 0.039589102853818135,
"calibration/coverage@25%": 0.189862326897534,
"calibration/coverage@30%": 0.4886187270899134,
"calibration/coverage@5%": 0.010080906132506668,
"calibration/ece": 0.18590618469068998,
"calibration/mean_confidence": 0.83246762819597,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01180555555555558,
"completions/max_length": 4029.6,
"completions/max_terminated_length": 4029.6,
"completions/mean_length": 553.0240539550781,
"completions/mean_terminated_length": 559.6217895507813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.6,
"epoch": 0.07199910001124986,
"grad_norm": 0.000492545310407877,
"learning_rate": 3.5714285714285718e-06,
"loss": -0.0069,
"num_tokens": 51613231.0,
"reward": 0.9050763845443726,
"reward_std": 0.19376931786537172,
"rewards/accuracy_reward": 0.5701388835906982,
"rewards/brier_reward": 0.6859318375587463,
"rewards/confidence_uniqueness_reward": 0.5833416938781738,
"rewards/format_reward": 0.9861979246139526,
"rewards/frontier_coverage_0": -0.00019347216002643108,
"rewards/frontier_coverage_1": -0.00019347216002643108,
"rewards/frontier_coverage_10": -0.00019347216002643108,
"rewards/frontier_coverage_15": -0.00019347216002643108,
"rewards/frontier_coverage_20": -0.00019347216002643108,
"rewards/frontier_coverage_25": -0.00019347216002643108,
"rewards/frontier_coverage_5": -0.00019347216002643108,
"signal/accuracy_reward/centered_abs_mean": 0.2508246570825577,
"signal/accuracy_reward/group_std_mean": 0.31349337100982666,
"signal/accuracy_reward/group_zero_std_frac": 0.1694444462656975,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12541232854127884,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12541232854127884,
"signal/advantage_abs_mean": 0.15153846591711045,
"signal/advantage_pre_scale_abs_mean": 0.15153846591711045,
"signal/advantage_pre_scale_std": 0.2147096276283264,
"signal/advantage_std": 0.2147096276283264,
"signal/brier_reward/centered_abs_mean": 0.18513798117637634,
"signal/brier_reward/group_std_mean": 0.23217344880104065,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018513799458742142,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.018513799458742142,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20066949725151062,
"signal/confidence_uniqueness_reward/group_std_mean": 0.23112341463565828,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02006694972515106,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02006694972515106,
"signal/format_reward/centered_abs_mean": 0.02318250872194767,
"signal/format_reward/group_std_mean": 0.04461696371436119,
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011591254360973835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011591254360973835,
"signal/frontier_coverage_0/centered_abs_mean": 0.03151394948363304,
"signal/frontier_coverage_0/group_std_mean": 0.04832508638501167,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_1/centered_abs_mean": 0.03151394948363304,
"signal/frontier_coverage_1/group_std_mean": 0.04832508638501167,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_10/centered_abs_mean": 0.03151394948363304,
"signal/frontier_coverage_10/group_std_mean": 0.04832508638501167,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_15/centered_abs_mean": 0.03151394948363304,
"signal/frontier_coverage_15/group_std_mean": 0.04832508638501167,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_20/centered_abs_mean": 0.03151394948363304,
"signal/frontier_coverage_20/group_std_mean": 0.04832508638501167,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_25/centered_abs_mean": 0.03151394948363304,
"signal/frontier_coverage_25/group_std_mean": 0.04832508638501167,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_5/centered_abs_mean": 0.03151394948363304,
"signal/frontier_coverage_5/group_std_mean": 0.04832508638501167,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0004506495199166238,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0004506495199166238,
"step": 30
},
{
"calibration/aurc": 0.2681236379901537,
"calibration/batch_distribution_entropy": 0.6073690581770046,
"calibration/buffer_distribution_entropy": 0.4477840560637281,
"calibration/confidence_entropy": 0.4926214392193963,
"calibration/coverage@0%": 0.005292996092884997,
"calibration/coverage@1%": 0.005292996092884997,
"calibration/coverage@10%": 0.01880650960639851,
"calibration/coverage@15%": 0.06538064730050323,
"calibration/coverage@20%": 0.12169978127767771,
"calibration/coverage@25%": 0.48489876034564067,
"calibration/coverage@30%": 0.8208872967759714,
"calibration/coverage@5%": 0.005292996092884997,
"calibration/ece": 0.10877696174454979,
"calibration/mean_confidence": 0.7835651647507251,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 4059.6,
"completions/max_terminated_length": 4059.6,
"completions/mean_length": 624.3829956054688,
"completions/mean_terminated_length": 634.435595703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.4,
"epoch": 0.08399895001312484,
"grad_norm": 0.0005502038984559476,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0098,
"num_tokens": 61883563.0,
"reward": 0.9427950978279114,
"reward_std": 0.16786060631275176,
"rewards/accuracy_reward": 0.6350694537162781,
"rewards/brier_reward": 0.7387005448341369,
"rewards/confidence_uniqueness_reward": 0.6191936731338501,
"rewards/format_reward": 0.9811631917953492,
"rewards/frontier_coverage_0": -0.011095415393356234,
"rewards/frontier_coverage_1": -0.011095415393356234,
"rewards/frontier_coverage_10": -0.011095415393356234,
"rewards/frontier_coverage_15": -0.011095415393356234,
"rewards/frontier_coverage_20": -0.011095415393356234,
"rewards/frontier_coverage_25": -0.011095415393356234,
"rewards/frontier_coverage_5": -0.011095415393356234,
"signal/accuracy_reward/centered_abs_mean": 0.20149739682674409,
"signal/accuracy_reward/group_std_mean": 0.260405895113945,
"signal/accuracy_reward/group_zero_std_frac": 0.2833333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10074869841337204,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10074869841337204,
"signal/advantage_abs_mean": 0.1255136936903,
"signal/advantage_pre_scale_abs_mean": 0.1255136936903,
"signal/advantage_pre_scale_std": 0.1959227830171585,
"signal/advantage_std": 0.1959227830171585,
"signal/brier_reward/centered_abs_mean": 0.1406739756464958,
"signal/brier_reward/group_std_mean": 0.1840929538011551,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014067397452890873,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014067397452890873,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.14535255879163742,
"signal/confidence_uniqueness_reward/group_std_mean": 0.17814411520957946,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01453525610268116,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01453525610268116,
"signal/format_reward/centered_abs_mean": 0.02883572019636631,
"signal/format_reward/group_std_mean": 0.05311768278479576,
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014417860098183155,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014417860098183155,
"signal/frontier_coverage_0/centered_abs_mean": 0.04025139883160591,
"signal/frontier_coverage_0/group_std_mean": 0.0570778988301754,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_1/centered_abs_mean": 0.04025139883160591,
"signal/frontier_coverage_1/group_std_mean": 0.0570778988301754,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_10/centered_abs_mean": 0.04025139883160591,
"signal/frontier_coverage_10/group_std_mean": 0.0570778988301754,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_15/centered_abs_mean": 0.04025139883160591,
"signal/frontier_coverage_15/group_std_mean": 0.0570778988301754,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_20/centered_abs_mean": 0.04025139883160591,
"signal/frontier_coverage_20/group_std_mean": 0.0570778988301754,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_25/centered_abs_mean": 0.04025139883160591,
"signal/frontier_coverage_25/group_std_mean": 0.0570778988301754,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_5/centered_abs_mean": 0.04025139883160591,
"signal/frontier_coverage_5/group_std_mean": 0.0570778988301754,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005755949881859123,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005755949881859123,
"step": 35
},
{
"calibration/aurc": 0.30111796135846025,
"calibration/batch_distribution_entropy": 0.7053583205644779,
"calibration/buffer_distribution_entropy": 0.5234970614904426,
"calibration/confidence_entropy": 0.5460232422366482,
"calibration/coverage@0%": 0.024122536032217527,
"calibration/coverage@1%": 0.024122536032217527,
"calibration/coverage@10%": 0.028311017707610197,
"calibration/coverage@15%": 0.14754387433618138,
"calibration/coverage@20%": 0.1898415244667297,
"calibration/coverage@25%": 0.2871458225715403,
"calibration/coverage@30%": 0.4155797006469385,
"calibration/coverage@5%": 0.028311017707610197,
"calibration/ece": 0.10965484584656611,
"calibration/mean_confidence": 0.7238385058092655,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014149305555555557,
"completions/max_length": 3896.8,
"completions/max_terminated_length": 3896.8,
"completions/mean_length": 662.5091186523438,
"completions/mean_terminated_length": 671.992822265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 183.0,
"epoch": 0.09599880001499982,
"grad_norm": 0.00039204536005854607,
"learning_rate": 4.761904761904762e-06,
"loss": -0.0107,
"num_tokens": 72635188.0,
"reward": 0.9650038480758667,
"reward_std": 0.15621234178543092,
"rewards/accuracy_reward": 0.6572048544883728,
"rewards/brier_reward": 0.7644977688789367,
"rewards/confidence_uniqueness_reward": 0.6965551018714905,
"rewards/format_reward": 0.9844617962837219,
"rewards/frontier_coverage_0": -0.01932877181097865,
"rewards/frontier_coverage_1": -0.01932877181097865,
"rewards/frontier_coverage_10": -0.01932877181097865,
"rewards/frontier_coverage_15": -0.01932877181097865,
"rewards/frontier_coverage_20": -0.01932877181097865,
"rewards/frontier_coverage_25": -0.01932877181097865,
"rewards/frontier_coverage_5": -0.01932877181097865,
"signal/accuracy_reward/centered_abs_mean": 0.18748372197151184,
"signal/accuracy_reward/group_std_mean": 0.24760319292545319,
"signal/accuracy_reward/group_zero_std_frac": 0.3000000029802322,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09374186098575592,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09374186098575592,
"signal/advantage_abs_mean": 0.11463638693094254,
"signal/advantage_pre_scale_abs_mean": 0.11463638693094254,
"signal/advantage_pre_scale_std": 0.18725805282592772,
"signal/advantage_std": 0.18725805282592772,
"signal/brier_reward/centered_abs_mean": 0.1293620839715004,
"signal/brier_reward/group_std_mean": 0.16848357319831847,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012936208583414554,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012936208583414554,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.14036289900541304,
"signal/confidence_uniqueness_reward/group_std_mean": 0.16973767578601837,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014036289602518081,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014036289602518081,
"signal/format_reward/centered_abs_mean": 0.02676323764026165,
"signal/format_reward/group_std_mean": 0.050653649121522905,
"signal/format_reward/group_zero_std_frac": 0.7916666865348816,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013381618820130826,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013381618820130826,
"signal/frontier_coverage_0/centered_abs_mean": 0.07164318114519119,
"signal/frontier_coverage_0/group_std_mean": 0.1008858099579811,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_1/centered_abs_mean": 0.07164318114519119,
"signal/frontier_coverage_1/group_std_mean": 0.1008858099579811,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_10/centered_abs_mean": 0.07164318114519119,
"signal/frontier_coverage_10/group_std_mean": 0.1008858099579811,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_15/centered_abs_mean": 0.07164318114519119,
"signal/frontier_coverage_15/group_std_mean": 0.1008858099579811,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_20/centered_abs_mean": 0.07164318114519119,
"signal/frontier_coverage_20/group_std_mean": 0.1008858099579811,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_25/centered_abs_mean": 0.07164318114519119,
"signal/frontier_coverage_25/group_std_mean": 0.1008858099579811,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_5/centered_abs_mean": 0.07164318114519119,
"signal/frontier_coverage_5/group_std_mean": 0.1008858099579811,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010244975332170726,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010244975332170726,
"step": 40
},
{
"calibration/aurc": 0.20395504840548478,
"calibration/batch_distribution_entropy": 0.8372735413533098,
"calibration/buffer_distribution_entropy": 0.5938288992808299,
"calibration/confidence_entropy": 0.5149116414916268,
"calibration/coverage@0%": 0.01894778067656185,
"calibration/coverage@1%": 0.01894778067656185,
"calibration/coverage@10%": 0.09860851646372701,
"calibration/coverage@15%": 0.11114115353944505,
"calibration/coverage@20%": 0.4856548179675456,
"calibration/coverage@25%": 0.8623658713733823,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.030958224540791612,
"calibration/ece": 0.12145240512186213,
"calibration/mean_confidence": 0.686226908594128,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01293402777777779,
"completions/max_length": 3836.2,
"completions/max_terminated_length": 3836.2,
"completions/mean_length": 695.34775390625,
"completions/mean_terminated_length": 704.525,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.8,
"epoch": 0.1079986500168748,
"grad_norm": 0.0004304039175622165,
"learning_rate": 4.909638554216868e-06,
"loss": -0.0104,
"num_tokens": 83780858.0,
"reward": 0.9816272854804993,
"reward_std": 0.1476329445838928,
"rewards/accuracy_reward": 0.6620659947395324,
"rewards/brier_reward": 0.7589836120605469,
"rewards/confidence_uniqueness_reward": 0.8408130168914795,
"rewards/format_reward": 0.9857638835906982,
"rewards/frontier_coverage_0": -0.022650658898055554,
"rewards/frontier_coverage_1": -0.022650658898055554,
"rewards/frontier_coverage_10": -0.022650658898055554,
"rewards/frontier_coverage_15": -0.022650658898055554,
"rewards/frontier_coverage_20": -0.022650658898055554,
"rewards/frontier_coverage_25": -0.022650658898055554,
"rewards/frontier_coverage_5": -0.022650658898055554,
"signal/accuracy_reward/centered_abs_mean": 0.18065863847732544,
"signal/accuracy_reward/group_std_mean": 0.23835379481315613,
"signal/accuracy_reward/group_zero_std_frac": 0.3194444507360458,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09032931923866272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09032931923866272,
"signal/advantage_abs_mean": 0.10960723757743836,
"signal/advantage_pre_scale_abs_mean": 0.10960723757743836,
"signal/advantage_pre_scale_std": 0.17717336416244506,
"signal/advantage_std": 0.17717336416244506,
"signal/brier_reward/centered_abs_mean": 0.15332198441028594,
"signal/brier_reward/group_std_mean": 0.19752687215805054,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015332199074327946,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015332199074327946,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09375370144844056,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11937893778085709,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009375370014458895,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009375370014458895,
"signal/format_reward/centered_abs_mean": 0.02317708320915699,
"signal/format_reward/group_std_mean": 0.040333667397499086,
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011588541604578496,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011588541604578496,
"signal/frontier_coverage_0/centered_abs_mean": 0.11648316830396652,
"signal/frontier_coverage_0/group_std_mean": 0.1628135621547699,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_1/centered_abs_mean": 0.11648316830396652,
"signal/frontier_coverage_1/group_std_mean": 0.1628135621547699,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_10/centered_abs_mean": 0.11648316830396652,
"signal/frontier_coverage_10/group_std_mean": 0.1628135621547699,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_15/centered_abs_mean": 0.11648316830396652,
"signal/frontier_coverage_15/group_std_mean": 0.1628135621547699,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_20/centered_abs_mean": 0.11648316830396652,
"signal/frontier_coverage_20/group_std_mean": 0.1628135621547699,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_25/centered_abs_mean": 0.11648316830396652,
"signal/frontier_coverage_25/group_std_mean": 0.1628135621547699,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_5/centered_abs_mean": 0.11648316830396652,
"signal/frontier_coverage_5/group_std_mean": 0.1628135621547699,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016657092841342092,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016657092841342092,
"step": 45
},
{
"calibration/aurc": 0.40004916389399164,
"calibration/batch_distribution_entropy": 0.8503065773009097,
"calibration/buffer_distribution_entropy": 0.6432089511173741,
"calibration/confidence_entropy": 0.4667162645346662,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.015384615384615385,
"calibration/coverage@20%": 0.019628647214854113,
"calibration/coverage@25%": 0.08075988115602772,
"calibration/coverage@30%": 0.3064657805788464,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.24841259649370925,
"calibration/mean_confidence": 0.7116104147978586,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00972222222222221,
"completions/max_length": 3621.4,
"completions/max_terminated_length": 3621.4,
"completions/mean_length": 705.259033203125,
"completions/mean_terminated_length": 712.1700073242188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 183.4,
"epoch": 0.11999850001874976,
"grad_norm": 0.000493047118652612,
"learning_rate": 4.759036144578314e-06,
"loss": -0.0076,
"num_tokens": 95003042.0,
"reward": 0.979445469379425,
"reward_std": 0.14116989970207214,
"rewards/accuracy_reward": 0.6446180582046509,
"rewards/brier_reward": 0.7461867094039917,
"rewards/confidence_uniqueness_reward": 0.8847499370574952,
"rewards/format_reward": 0.98984375,
"rewards/frontier_coverage_0": -0.008782240888103842,
"rewards/frontier_coverage_1": -0.008782240888103842,
"rewards/frontier_coverage_10": -0.008782240888103842,
"rewards/frontier_coverage_15": -0.008782240888103842,
"rewards/frontier_coverage_20": -0.008782240888103842,
"rewards/frontier_coverage_25": -0.008782240888103842,
"rewards/frontier_coverage_5": -0.008782240888103842,
"signal/accuracy_reward/centered_abs_mean": 0.1734266459941864,
"signal/accuracy_reward/group_std_mean": 0.22699449956417084,
"signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0867133229970932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0867133229970932,
"signal/advantage_abs_mean": 0.10538902878761292,
"signal/advantage_pre_scale_abs_mean": 0.10538902878761292,
"signal/advantage_pre_scale_std": 0.17160050570964813,
"signal/advantage_std": 0.17160050570964813,
"signal/brier_reward/centered_abs_mean": 0.17446674704551696,
"signal/brier_reward/group_std_mean": 0.22122922539710999,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017446675151586533,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017446675151586533,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07939892560243607,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10281916856765747,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007939892914146185,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007939892914146185,
"signal/format_reward/centered_abs_mean": 0.017041015811264514,
"signal/format_reward/group_std_mean": 0.03274031579494476,
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008520507905632257,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008520507905632257,
"signal/frontier_coverage_0/centered_abs_mean": 0.1188867524266243,
"signal/frontier_coverage_0/group_std_mean": 0.17139540314674379,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_1/centered_abs_mean": 0.1188867524266243,
"signal/frontier_coverage_1/group_std_mean": 0.17139540314674379,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_10/centered_abs_mean": 0.1188867524266243,
"signal/frontier_coverage_10/group_std_mean": 0.17139540314674379,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_15/centered_abs_mean": 0.1188867524266243,
"signal/frontier_coverage_15/group_std_mean": 0.17139540314674379,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_20/centered_abs_mean": 0.1188867524266243,
"signal/frontier_coverage_20/group_std_mean": 0.17139540314674379,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_25/centered_abs_mean": 0.1188867524266243,
"signal/frontier_coverage_25/group_std_mean": 0.17139540314674379,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_5/centered_abs_mean": 0.1188867524266243,
"signal/frontier_coverage_5/group_std_mean": 0.17139540314674379,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017000806052237748,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017000806052237748,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_calibration/aurc": 0.27069268800775687,
"eval_calibration/batch_distribution_entropy": 0.7597488500774766,
"eval_calibration/buffer_distribution_entropy": 0.665248508467892,
"eval_calibration/confidence_entropy": 0.41376028637381856,
"eval_calibration/coverage@0%": 0.057291666666666664,
"eval_calibration/coverage@1%": 0.057291666666666664,
"eval_calibration/coverage@10%": 0.057291666666666664,
"eval_calibration/coverage@15%": 0.15625,
"eval_calibration/coverage@20%": 0.3385416666666667,
"eval_calibration/coverage@25%": 0.5,
"eval_calibration/coverage@30%": 0.7708333333333334,
"eval_calibration/coverage@5%": 0.057291666666666664,
"eval_calibration/ece": 0.23475069231304369,
"eval_calibration/mean_confidence": 0.7354858947273533,
"eval_completions/clipped_ratio": 0.006076388888888895,
"eval_completions/max_length": 2445.5,
"eval_completions/max_terminated_length": 2445.5,
"eval_completions/mean_length": 694.704345703125,
"eval_completions/mean_terminated_length": 699.0004577636719,
"eval_completions/min_length": 85.33333333333333,
"eval_completions/min_terminated_length": 241.0,
"eval_loss": 0.0,
"eval_num_tokens": 95003042.0,
"eval_reward": 0.9741584062576294,
"eval_reward_std": 0.2603639264901479,
"eval_rewards/accuracy_reward": 0.6388889054457346,
"eval_rewards/brier_reward": 0.7358029286066691,
"eval_rewards/confidence_uniqueness_reward": 0.8596515456835429,
"eval_rewards/format_reward": 0.9930555621782938,
"eval_rewards/frontier_coverage_0": -0.013578996993601322,
"eval_rewards/frontier_coverage_1": -0.013578996993601322,
"eval_rewards/frontier_coverage_10": -0.013578996993601322,
"eval_rewards/frontier_coverage_15": -0.013578996993601322,
"eval_rewards/frontier_coverage_20": -0.013578996993601322,
"eval_rewards/frontier_coverage_25": -0.013578996993601322,
"eval_rewards/frontier_coverage_5": -0.013578996993601322,
"eval_runtime": 203.1998,
"eval_samples_per_second": 4.921,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4524739583333333,
"eval_signal/accuracy_reward/group_std_mean": 0.4828086843093236,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22623697916666666,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22623697916666666,
"eval_signal/advantage_abs_mean": 0.23652214308579764,
"eval_signal/advantage_pre_scale_abs_mean": 0.23652214308579764,
"eval_signal/advantage_pre_scale_std": 0.2581101755301158,
"eval_signal/advantage_std": 0.2581101755301158,
"eval_signal/brier_reward/centered_abs_mean": 0.24942312637964884,
"eval_signal/brier_reward/group_std_mean": 0.3024876117706299,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024942313010493915,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.024942313010493915,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06988021731376648,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09675693760315578,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00698802216599385,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00698802216599385,
"eval_signal/format_reward/centered_abs_mean": 0.013454860852410397,
"eval_signal/format_reward/group_std_mean": 0.03928370991100868,
"eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006727430426205198,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.006727430426205198,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.14178907995422682,
"eval_signal/frontier_coverage_0/group_std_mean": 0.22794127960999808,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.14178907995422682,
"eval_signal/frontier_coverage_1/group_std_mean": 0.22794127960999808,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.14178907995422682,
"eval_signal/frontier_coverage_10/group_std_mean": 0.22794127960999808,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.14178907995422682,
"eval_signal/frontier_coverage_15/group_std_mean": 0.22794127960999808,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.14178907995422682,
"eval_signal/frontier_coverage_20/group_std_mean": 0.22794127960999808,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.14178907995422682,
"eval_signal/frontier_coverage_25/group_std_mean": 0.22794127960999808,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.14178907995422682,
"eval_signal/frontier_coverage_5/group_std_mean": 0.22794127960999808,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002027583793581774,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002027583793581774,
"eval_steps_per_second": 0.03,
"step": 50
},
{
"calibration/aurc": 0.2623736134960931,
"calibration/batch_distribution_entropy": 0.8514919861126554,
"calibration/buffer_distribution_entropy": 0.6764847742047964,
"calibration/confidence_entropy": 0.44676430730968664,
"calibration/coverage@0%": 0.002617801047120419,
"calibration/coverage@1%": 0.002617801047120419,
"calibration/coverage@10%": 0.13036649214659687,
"calibration/coverage@15%": 0.22303664921465968,
"calibration/coverage@20%": 0.2418848167539267,
"calibration/coverage@25%": 0.5089005235602094,
"calibration/coverage@30%": 0.737714654822215,
"calibration/coverage@5%": 0.038743455497382194,
"calibration/ece": 0.15346165735644177,
"calibration/mean_confidence": 0.7219380418125023,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012152777777777768,
"completions/max_length": 3167.8,
"completions/max_terminated_length": 3167.8,
"completions/mean_length": 718.0534912109375,
"completions/mean_terminated_length": 726.9828491210938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 155.2,
"epoch": 0.13199835002062474,
"grad_norm": 0.00041744497139006853,
"learning_rate": 4.60843373493976e-06,
"loss": -0.0089,
"num_tokens": 106355594.0,
"reward": 0.9921215295791626,
"reward_std": 0.13974846601486207,
"rewards/accuracy_reward": 0.66328125,
"rewards/brier_reward": 0.7599790453910827,
"rewards/confidence_uniqueness_reward": 0.9193215608596802,
"rewards/format_reward": 0.9871527791023255,
"rewards/frontier_coverage_0": -0.010245506907813251,
"rewards/frontier_coverage_1": -0.010245506907813251,
"rewards/frontier_coverage_10": -0.010245506907813251,
"rewards/frontier_coverage_15": -0.010245506907813251,
"rewards/frontier_coverage_20": -0.010245506907813251,
"rewards/frontier_coverage_25": -0.010245506907813251,
"rewards/frontier_coverage_5": -0.010245506907813251,
"signal/accuracy_reward/centered_abs_mean": 0.17350803017616273,
"signal/accuracy_reward/group_std_mean": 0.22752963304519652,
"signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08675401508808137,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08675401508808137,
"signal/advantage_abs_mean": 0.10408687144517899,
"signal/advantage_pre_scale_abs_mean": 0.10408687144517899,
"signal/advantage_pre_scale_std": 0.17095798552036284,
"signal/advantage_std": 0.17095798552036284,
"signal/brier_reward/centered_abs_mean": 0.16589346528053284,
"signal/brier_reward/group_std_mean": 0.2125259518623352,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016589346528053283,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016589346528053283,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04859142899513245,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07001910582184792,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004859142657369375,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004859142657369375,
"signal/format_reward/centered_abs_mean": 0.01928168386220932,
"signal/format_reward/group_std_mean": 0.03471194803714752,
"signal/format_reward/group_zero_std_frac": 0.8638888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00964084193110466,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00964084193110466,
"signal/frontier_coverage_0/centered_abs_mean": 0.13067235350608825,
"signal/frontier_coverage_0/group_std_mean": 0.1851820766925812,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_1/centered_abs_mean": 0.13067235350608825,
"signal/frontier_coverage_1/group_std_mean": 0.1851820766925812,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_10/centered_abs_mean": 0.13067235350608825,
"signal/frontier_coverage_10/group_std_mean": 0.1851820766925812,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_15/centered_abs_mean": 0.13067235350608825,
"signal/frontier_coverage_15/group_std_mean": 0.1851820766925812,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_20/centered_abs_mean": 0.13067235350608825,
"signal/frontier_coverage_20/group_std_mean": 0.1851820766925812,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_25/centered_abs_mean": 0.13067235350608825,
"signal/frontier_coverage_25/group_std_mean": 0.1851820766925812,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_5/centered_abs_mean": 0.13067235350608825,
"signal/frontier_coverage_5/group_std_mean": 0.1851820766925812,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018686146708205343,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018686146708205343,
"step": 55
},
{
"calibration/aurc": 0.322583325206245,
"calibration/batch_distribution_entropy": 0.8352429768114273,
"calibration/buffer_distribution_entropy": 0.6996665514037949,
"calibration/confidence_entropy": 0.4572730513075878,
"calibration/coverage@0%": 0.005208333333333334,
"calibration/coverage@1%": 0.005208333333333334,
"calibration/coverage@10%": 0.16354166666666667,
"calibration/coverage@15%": 0.29375,
"calibration/coverage@20%": 0.359375,
"calibration/coverage@25%": 0.3921875,
"calibration/coverage@30%": 0.45691489361702126,
"calibration/coverage@5%": 0.005208333333333334,
"calibration/ece": 0.1953935735919945,
"calibration/mean_confidence": 0.72532795217994,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009201388888888884,
"completions/max_length": 3781.6,
"completions/max_terminated_length": 3781.6,
"completions/mean_length": 738.7850830078125,
"completions/mean_terminated_length": 745.7328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.8,
"epoch": 0.14399820002249972,
"grad_norm": 0.0004830217803828418,
"learning_rate": 4.457831325301205e-06,
"loss": -0.0079,
"num_tokens": 117962974.0,
"reward": 0.9810348987579346,
"reward_std": 0.1425999477505684,
"rewards/accuracy_reward": 0.6373263835906983,
"rewards/brier_reward": 0.7495613336563111,
"rewards/confidence_uniqueness_reward": 0.9255987882614136,
"rewards/format_reward": 0.9904513955116272,
"rewards/frontier_coverage_0": -0.0036967315711081026,
"rewards/frontier_coverage_1": -0.0036967315711081026,
"rewards/frontier_coverage_10": -0.0036967315711081026,
"rewards/frontier_coverage_15": -0.0036967315711081026,
"rewards/frontier_coverage_20": -0.0036967315711081026,
"rewards/frontier_coverage_25": -0.0036967315711081026,
"rewards/frontier_coverage_5": -0.0036967315711081026,
"signal/accuracy_reward/centered_abs_mean": 0.18495008647441863,
"signal/accuracy_reward/group_std_mean": 0.238821542263031,
"signal/accuracy_reward/group_zero_std_frac": 0.3361111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09247504323720931,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09247504323720931,
"signal/advantage_abs_mean": 0.10767804533243179,
"signal/advantage_pre_scale_abs_mean": 0.10767804533243179,
"signal/advantage_pre_scale_std": 0.1729002594947815,
"signal/advantage_std": 0.1729002594947815,
"signal/brier_reward/centered_abs_mean": 0.16945191323757172,
"signal/brier_reward/group_std_mean": 0.21429203748703002,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016945191845297813,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016945191845297813,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04097889587283134,
"signal/confidence_uniqueness_reward/group_std_mean": 0.061232827603816986,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004097889456897974,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004097889456897974,
"signal/format_reward/centered_abs_mean": 0.01637369776144624,
"signal/format_reward/group_std_mean": 0.031473302841186525,
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00818684888072312,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00818684888072312,
"signal/frontier_coverage_0/centered_abs_mean": 0.12938774824142457,
"signal/frontier_coverage_0/group_std_mean": 0.18174587488174437,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_1/centered_abs_mean": 0.12938774824142457,
"signal/frontier_coverage_1/group_std_mean": 0.18174587488174437,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_10/centered_abs_mean": 0.12938774824142457,
"signal/frontier_coverage_10/group_std_mean": 0.18174587488174437,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_15/centered_abs_mean": 0.12938774824142457,
"signal/frontier_coverage_15/group_std_mean": 0.18174587488174437,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_20/centered_abs_mean": 0.12938774824142457,
"signal/frontier_coverage_20/group_std_mean": 0.18174587488174437,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_25/centered_abs_mean": 0.12938774824142457,
"signal/frontier_coverage_25/group_std_mean": 0.18174587488174437,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_5/centered_abs_mean": 0.12938774824142457,
"signal/frontier_coverage_5/group_std_mean": 0.18174587488174437,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018502447521314025,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018502447521314025,
"step": 60
},
{
"calibration/aurc": 0.2320311524237611,
"calibration/batch_distribution_entropy": 0.8156248009958293,
"calibration/buffer_distribution_entropy": 0.7178334508271775,
"calibration/confidence_entropy": 0.44483765410335996,
"calibration/coverage@0%": 0.01632690638939771,
"calibration/coverage@1%": 0.01632690638939771,
"calibration/coverage@10%": 0.28226071766560934,
"calibration/coverage@15%": 0.4698960721302671,
"calibration/coverage@20%": 0.5423117811753071,
"calibration/coverage@25%": 0.5948163812308549,
"calibration/coverage@30%": 0.6710554163185741,
"calibration/coverage@5%": 0.04632690638939771,
"calibration/ece": 0.11255049437942628,
"calibration/mean_confidence": 0.7353874331667388,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008680555555555535,
"completions/max_length": 3591.8,
"completions/max_terminated_length": 3591.8,
"completions/mean_length": 724.928466796875,
"completions/mean_terminated_length": 731.31708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 197.2,
"epoch": 0.1559980500243747,
"grad_norm": 0.0005269707180559635,
"learning_rate": 4.307228915662651e-06,
"loss": -0.0064,
"num_tokens": 129408198.0,
"reward": 1.003051507472992,
"reward_std": 0.12487713843584061,
"rewards/accuracy_reward": 0.6729166626930236,
"rewards/brier_reward": 0.7835439920425415,
"rewards/confidence_uniqueness_reward": 0.9210333466529846,
"rewards/format_reward": 0.9910590291023255,
"rewards/frontier_coverage_0": 0.006052964180707931,
"rewards/frontier_coverage_1": 0.006052964180707931,
"rewards/frontier_coverage_10": 0.006052964180707931,
"rewards/frontier_coverage_15": 0.006052964180707931,
"rewards/frontier_coverage_20": 0.006052964180707931,
"rewards/frontier_coverage_25": 0.006052964180707931,
"rewards/frontier_coverage_5": 0.006052964180707931,
"signal/accuracy_reward/centered_abs_mean": 0.15330946147441865,
"signal/accuracy_reward/group_std_mean": 0.20192310214042664,
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07665473073720933,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07665473073720933,
"signal/advantage_abs_mean": 0.09188215732574463,
"signal/advantage_pre_scale_abs_mean": 0.09188215732574463,
"signal/advantage_pre_scale_std": 0.16062064170837403,
"signal/advantage_std": 0.16062064170837403,
"signal/brier_reward/centered_abs_mean": 0.14177187383174897,
"signal/brier_reward/group_std_mean": 0.18185266852378845,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014177187345921993,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014177187345921993,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04180399999022484,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06167575493454933,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004180399980396032,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004180399980396032,
"signal/format_reward/centered_abs_mean": 0.015771484375,
"signal/format_reward/group_std_mean": 0.030386429652571677,
"signal/format_reward/group_zero_std_frac": 0.8750000238418579,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0078857421875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0078857421875,
"signal/frontier_coverage_0/centered_abs_mean": 0.09885098785161972,
"signal/frontier_coverage_0/group_std_mean": 0.13911318629980088,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_1/centered_abs_mean": 0.09885098785161972,
"signal/frontier_coverage_1/group_std_mean": 0.13911318629980088,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_10/centered_abs_mean": 0.09885098785161972,
"signal/frontier_coverage_10/group_std_mean": 0.13911318629980088,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_15/centered_abs_mean": 0.09885098785161972,
"signal/frontier_coverage_15/group_std_mean": 0.13911318629980088,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_20/centered_abs_mean": 0.09885098785161972,
"signal/frontier_coverage_20/group_std_mean": 0.13911318629980088,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_25/centered_abs_mean": 0.09885098785161972,
"signal/frontier_coverage_25/group_std_mean": 0.13911318629980088,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_5/centered_abs_mean": 0.09885098785161972,
"signal/frontier_coverage_5/group_std_mean": 0.13911318629980088,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001413569157011807,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001413569157011807,
"step": 65
},
{
"calibration/aurc": 0.30146087691874146,
"calibration/batch_distribution_entropy": 0.8401200097364473,
"calibration/buffer_distribution_entropy": 0.7292469367397179,
"calibration/confidence_entropy": 0.4543341177595561,
"calibration/coverage@0%": 0.006362346830231948,
"calibration/coverage@1%": 0.006362346830231948,
"calibration/coverage@10%": 0.07782901349689861,
"calibration/coverage@15%": 0.1092956801635653,
"calibration/coverage@20%": 0.16031743821404396,
"calibration/coverage@25%": 0.3165103015734869,
"calibration/coverage@30%": 0.3856639134098664,
"calibration/coverage@5%": 0.058095680163565276,
"calibration/ece": 0.1530902409412818,
"calibration/mean_confidence": 0.723583873833328,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555557,
"completions/max_length": 3282.6,
"completions/max_terminated_length": 3282.6,
"completions/mean_length": 718.2440185546875,
"completions/mean_terminated_length": 725.6932373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.4,
"epoch": 0.16799790002624967,
"grad_norm": 0.0005004884442314506,
"learning_rate": 4.156626506024097e-06,
"loss": -0.0088,
"num_tokens": 140760513.0,
"reward": 0.9935579299926758,
"reward_std": 0.12153150737285615,
"rewards/accuracy_reward": 0.6536458373069763,
"rewards/brier_reward": 0.7778854846954346,
"rewards/confidence_uniqueness_reward": 0.9321373701095581,
"rewards/format_reward": 0.9896701335906982,
"rewards/frontier_coverage_0": 0.008967609610408545,
"rewards/frontier_coverage_1": 0.008967609610408545,
"rewards/frontier_coverage_10": 0.008967609610408545,
"rewards/frontier_coverage_15": 0.008967609610408545,
"rewards/frontier_coverage_20": 0.008967609610408545,
"rewards/frontier_coverage_25": 0.008967609610408545,
"rewards/frontier_coverage_5": 0.008967609610408545,
"signal/accuracy_reward/centered_abs_mean": 0.15254991352558137,
"signal/accuracy_reward/group_std_mean": 0.20258214175701142,
"signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07627495676279068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07627495676279068,
"signal/advantage_abs_mean": 0.08828288316726685,
"signal/advantage_pre_scale_abs_mean": 0.08828288316726685,
"signal/advantage_pre_scale_std": 0.15849795639514924,
"signal/advantage_std": 0.15849795639514924,
"signal/brier_reward/centered_abs_mean": 0.13586993813514708,
"signal/brier_reward/group_std_mean": 0.17457389533519746,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013586993515491485,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013586993515491485,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03803398087620735,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05799528583884239,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038033980876207353,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038033980876207353,
"signal/format_reward/centered_abs_mean": 0.018071831949055196,
"signal/format_reward/group_std_mean": 0.03415331579744816,
"signal/format_reward/group_zero_std_frac": 0.8611111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009035915974527598,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009035915974527598,
"signal/frontier_coverage_0/centered_abs_mean": 0.11883060038089752,
"signal/frontier_coverage_0/group_std_mean": 0.16314986646175383,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_1/centered_abs_mean": 0.11883060038089752,
"signal/frontier_coverage_1/group_std_mean": 0.16314986646175383,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_10/centered_abs_mean": 0.11883060038089752,
"signal/frontier_coverage_10/group_std_mean": 0.16314986646175383,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_15/centered_abs_mean": 0.11883060038089752,
"signal/frontier_coverage_15/group_std_mean": 0.16314986646175383,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_20/centered_abs_mean": 0.11883060038089752,
"signal/frontier_coverage_20/group_std_mean": 0.16314986646175383,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_25/centered_abs_mean": 0.11883060038089752,
"signal/frontier_coverage_25/group_std_mean": 0.16314986646175383,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_5/centered_abs_mean": 0.11883060038089752,
"signal/frontier_coverage_5/group_std_mean": 0.16314986646175383,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016992775024846196,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016992775024846196,
"step": 70
},
{
"calibration/aurc": 0.2161523716364652,
"calibration/batch_distribution_entropy": 0.8359568738154429,
"calibration/buffer_distribution_entropy": 0.7416488545921555,
"calibration/confidence_entropy": 0.4851037077204311,
"calibration/coverage@0%": 0.001058201058201058,
"calibration/coverage@1%": 0.001058201058201058,
"calibration/coverage@10%": 0.19976453641375108,
"calibration/coverage@15%": 0.33732977608298814,
"calibration/coverage@20%": 0.5214813130814425,
"calibration/coverage@25%": 0.6504089077351132,
"calibration/coverage@30%": 0.7415668559271692,
"calibration/coverage@5%": 0.08535139477547854,
"calibration/ece": 0.1188181935086359,
"calibration/mean_confidence": 0.7158355008307618,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004513888888888884,
"completions/max_length": 3429.2,
"completions/max_terminated_length": 3429.2,
"completions/mean_length": 726.0884521484375,
"completions/mean_terminated_length": 729.3609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.8,
"epoch": 0.17999775002812465,
"grad_norm": 0.00046963320346549153,
"learning_rate": 4.006024096385543e-06,
"loss": -0.0016,
"num_tokens": 152189948.0,
"reward": 1.0248547196388245,
"reward_std": 0.11748676300048828,
"rewards/accuracy_reward": 0.7057291746139527,
"rewards/brier_reward": 0.8091526508331299,
"rewards/confidence_uniqueness_reward": 0.9357035756111145,
"rewards/format_reward": 0.9953992962837219,
"rewards/frontier_coverage_0": -0.0019492823630571365,
"rewards/frontier_coverage_1": -0.0019492823630571365,
"rewards/frontier_coverage_10": -0.0019492823630571365,
"rewards/frontier_coverage_15": -0.0019492823630571365,
"rewards/frontier_coverage_20": -0.0019492823630571365,
"rewards/frontier_coverage_25": -0.0019492823630571365,
"rewards/frontier_coverage_5": -0.0019492823630571365,
"signal/accuracy_reward/centered_abs_mean": 0.1632920980453491,
"signal/accuracy_reward/group_std_mean": 0.20991926789283752,
"signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08164604902267455,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08164604902267455,
"signal/advantage_abs_mean": 0.08842966109514236,
"signal/advantage_pre_scale_abs_mean": 0.08842966109514236,
"signal/advantage_pre_scale_std": 0.15177057385444642,
"signal/advantage_std": 0.15177057385444642,
"signal/brier_reward/centered_abs_mean": 0.12017300575971604,
"signal/brier_reward/group_std_mean": 0.15719916820526122,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012017300724983216,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012017300724983216,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03090880624949932,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046534180641174316,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030908805783838034,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030908805783838034,
"signal/format_reward/centered_abs_mean": 0.008512369729578495,
"signal/format_reward/group_std_mean": 0.01946439780294895,
"signal/format_reward/group_zero_std_frac": 0.9083333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004256184864789248,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004256184864789248,
"signal/frontier_coverage_0/centered_abs_mean": 0.10867546051740647,
"signal/frontier_coverage_0/group_std_mean": 0.14882293045520784,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_1/centered_abs_mean": 0.10867546051740647,
"signal/frontier_coverage_1/group_std_mean": 0.14882293045520784,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_10/centered_abs_mean": 0.10867546051740647,
"signal/frontier_coverage_10/group_std_mean": 0.14882293045520784,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_15/centered_abs_mean": 0.10867546051740647,
"signal/frontier_coverage_15/group_std_mean": 0.14882293045520784,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_20/centered_abs_mean": 0.10867546051740647,
"signal/frontier_coverage_20/group_std_mean": 0.14882293045520784,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_25/centered_abs_mean": 0.10867546051740647,
"signal/frontier_coverage_25/group_std_mean": 0.14882293045520784,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_5/centered_abs_mean": 0.10867546051740647,
"signal/frontier_coverage_5/group_std_mean": 0.14882293045520784,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015540590975433588,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015540590975433588,
"step": 75
},
{
"calibration/aurc": 0.18651022352019814,
"calibration/batch_distribution_entropy": 0.7930350849715844,
"calibration/buffer_distribution_entropy": 0.7505434491465455,
"calibration/confidence_entropy": 0.43033643310086267,
"calibration/coverage@0%": 0.020376741516687377,
"calibration/coverage@1%": 0.020376741516687377,
"calibration/coverage@10%": 0.28033401661756563,
"calibration/coverage@15%": 0.3723768532157701,
"calibration/coverage@20%": 0.6270485291790584,
"calibration/coverage@25%": 0.6903603691654683,
"calibration/coverage@30%": 0.8499867357339328,
"calibration/coverage@5%": 0.18643418276995108,
"calibration/ece": 0.15777915022958544,
"calibration/mean_confidence": 0.7495933013222421,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006770833333333304,
"completions/max_length": 3568.8,
"completions/max_terminated_length": 3568.8,
"completions/mean_length": 770.2166625976563,
"completions/mean_terminated_length": 775.5416259765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.0,
"epoch": 0.19199760002999963,
"grad_norm": 0.0005624489858746529,
"learning_rate": 3.855421686746989e-06,
"loss": -0.0061,
"num_tokens": 164116124.0,
"reward": 1.008245551586151,
"reward_std": 0.11997615545988083,
"rewards/accuracy_reward": 0.6761284828186035,
"rewards/brier_reward": 0.7941550731658935,
"rewards/confidence_uniqueness_reward": 0.9332335710525512,
"rewards/format_reward": 0.9932291626930236,
"rewards/frontier_coverage_0": 0.008270517364144326,
"rewards/frontier_coverage_1": 0.008270517364144326,
"rewards/frontier_coverage_10": 0.008270517364144326,
"rewards/frontier_coverage_15": 0.008270517364144326,
"rewards/frontier_coverage_20": 0.008270517364144326,
"rewards/frontier_coverage_25": 0.008270517364144326,
"rewards/frontier_coverage_5": 0.008270517364144326,
"signal/accuracy_reward/centered_abs_mean": 0.15729708969593048,
"signal/accuracy_reward/group_std_mean": 0.21044376790523528,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07864854484796524,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07864854484796524,
"signal/advantage_abs_mean": 0.08815103322267533,
"signal/advantage_pre_scale_abs_mean": 0.08815103322267533,
"signal/advantage_pre_scale_std": 0.15297050178050994,
"signal/advantage_std": 0.15297050178050994,
"signal/brier_reward/centered_abs_mean": 0.1248743325471878,
"signal/brier_reward/group_std_mean": 0.16186331510543822,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012487433291971684,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012487433291971684,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034130534902215,
"signal/confidence_uniqueness_reward/group_std_mean": 0.049203697592020035,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034130535554140807,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034130535554140807,
"signal/format_reward/centered_abs_mean": 0.011599392537027598,
"signal/format_reward/group_std_mean": 0.021724069118499757,
"signal/format_reward/group_zero_std_frac": 0.9111111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005799696268513799,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005799696268513799,
"signal/frontier_coverage_0/centered_abs_mean": 0.10307898521423339,
"signal/frontier_coverage_0/group_std_mean": 0.143079274892807,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_1/centered_abs_mean": 0.10307898521423339,
"signal/frontier_coverage_1/group_std_mean": 0.143079274892807,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_10/centered_abs_mean": 0.10307898521423339,
"signal/frontier_coverage_10/group_std_mean": 0.143079274892807,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_15/centered_abs_mean": 0.10307898521423339,
"signal/frontier_coverage_15/group_std_mean": 0.143079274892807,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_20/centered_abs_mean": 0.10307898521423339,
"signal/frontier_coverage_20/group_std_mean": 0.143079274892807,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_25/centered_abs_mean": 0.10307898521423339,
"signal/frontier_coverage_25/group_std_mean": 0.143079274892807,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_5/centered_abs_mean": 0.10307898521423339,
"signal/frontier_coverage_5/group_std_mean": 0.143079274892807,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014740294544026255,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014740294544026255,
"step": 80
},
{
"calibration/aurc": 0.1885673146932413,
"calibration/batch_distribution_entropy": 0.7733087879174113,
"calibration/buffer_distribution_entropy": 0.7553238636343612,
"calibration/confidence_entropy": 0.40886239649926487,
"calibration/coverage@0%": 0.009919262309161616,
"calibration/coverage@1%": 0.009919262309161616,
"calibration/coverage@10%": 0.15248443679487708,
"calibration/coverage@15%": 0.37427037556169357,
"calibration/coverage@20%": 0.5941326314740762,
"calibration/coverage@25%": 0.7604391622349562,
"calibration/coverage@30%": 0.871613869192398,
"calibration/coverage@5%": 0.027166753583158128,
"calibration/ece": 0.12875506861667763,
"calibration/mean_confidence": 0.7515428029706879,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003732638888888906,
"completions/max_length": 3308.4,
"completions/max_terminated_length": 3308.4,
"completions/mean_length": 760.9641723632812,
"completions/mean_terminated_length": 763.8289794921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 234.6,
"epoch": 0.2039974500318746,
"grad_norm": 0.0005237568984739482,
"learning_rate": 3.7048192771084342e-06,
"loss": -0.0019,
"num_tokens": 175969631.0,
"reward": 1.0170960187911988,
"reward_std": 0.1100090652704239,
"rewards/accuracy_reward": 0.6894097208976746,
"rewards/brier_reward": 0.8078195929527283,
"rewards/confidence_uniqueness_reward": 0.919755506515503,
"rewards/format_reward": 0.9962673664093018,
"rewards/frontier_coverage_0": 0.014984596229624003,
"rewards/frontier_coverage_1": 0.014984596229624003,
"rewards/frontier_coverage_10": 0.014984596229624003,
"rewards/frontier_coverage_15": 0.014984596229624003,
"rewards/frontier_coverage_20": 0.014984596229624003,
"rewards/frontier_coverage_25": 0.014984596229624003,
"rewards/frontier_coverage_5": 0.014984596229624003,
"signal/accuracy_reward/centered_abs_mean": 0.15218099057674409,
"signal/accuracy_reward/group_std_mean": 0.19950250685214996,
"signal/accuracy_reward/group_zero_std_frac": 0.43055556416511537,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07609049528837204,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07609049528837204,
"signal/advantage_abs_mean": 0.08195096254348755,
"signal/advantage_pre_scale_abs_mean": 0.08195096254348755,
"signal/advantage_pre_scale_std": 0.1454104334115982,
"signal/advantage_std": 0.1454104334115982,
"signal/brier_reward/centered_abs_mean": 0.11562369614839554,
"signal/brier_reward/group_std_mean": 0.15216899812221527,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011562369205057621,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011562369205057621,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04034182578325272,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0564569778740406,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004034182662144304,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004034182662144304,
"signal/format_reward/centered_abs_mean": 0.006960720429196954,
"signal/format_reward/group_std_mean": 0.016455814242362976,
"signal/format_reward/group_zero_std_frac": 0.919444465637207,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003480360214598477,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003480360214598477,
"signal/frontier_coverage_0/centered_abs_mean": 0.09572111815214157,
"signal/frontier_coverage_0/group_std_mean": 0.13204465210437774,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_1/centered_abs_mean": 0.09572111815214157,
"signal/frontier_coverage_1/group_std_mean": 0.13204465210437774,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_10/centered_abs_mean": 0.09572111815214157,
"signal/frontier_coverage_10/group_std_mean": 0.13204465210437774,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_15/centered_abs_mean": 0.09572111815214157,
"signal/frontier_coverage_15/group_std_mean": 0.13204465210437774,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_20/centered_abs_mean": 0.09572111815214157,
"signal/frontier_coverage_20/group_std_mean": 0.13204465210437774,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_25/centered_abs_mean": 0.09572111815214157,
"signal/frontier_coverage_25/group_std_mean": 0.13204465210437774,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_5/centered_abs_mean": 0.09572111815214157,
"signal/frontier_coverage_5/group_std_mean": 0.13204465210437774,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013688119826838375,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013688119826838375,
"step": 85
},
{
"calibration/aurc": 0.13547219535403135,
"calibration/batch_distribution_entropy": 0.8332040774364462,
"calibration/buffer_distribution_entropy": 0.7601464935483995,
"calibration/confidence_entropy": 0.4530539299800417,
"calibration/coverage@0%": 0.07140748031496062,
"calibration/coverage@1%": 0.09484498031496062,
"calibration/coverage@10%": 0.49112532808398945,
"calibration/coverage@15%": 0.6052944553805775,
"calibration/coverage@20%": 0.7221538713910761,
"calibration/coverage@25%": 0.8530511811023622,
"calibration/coverage@30%": 0.9115731627296588,
"calibration/coverage@5%": 0.2908136482939633,
"calibration/ece": 0.09694233170413263,
"calibration/mean_confidence": 0.6963118038044955,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003472222222222232,
"completions/max_length": 3379.0,
"completions/max_terminated_length": 3379.0,
"completions/mean_length": 740.676220703125,
"completions/mean_terminated_length": 743.2724243164063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 210.8,
"epoch": 0.2159973000337496,
"grad_norm": 0.0005956885870546103,
"learning_rate": 3.5542168674698798e-06,
"loss": -0.0022,
"num_tokens": 187570893.0,
"reward": 1.0165443658828734,
"reward_std": 0.11067529022693634,
"rewards/accuracy_reward": 0.6825520873069764,
"rewards/brier_reward": 0.8147860646247864,
"rewards/confidence_uniqueness_reward": 0.935043203830719,
"rewards/format_reward": 0.9965277791023255,
"rewards/frontier_coverage_0": 0.020194912049919366,
"rewards/frontier_coverage_1": 0.020194912049919366,
"rewards/frontier_coverage_10": 0.020194912049919366,
"rewards/frontier_coverage_15": 0.020194912049919366,
"rewards/frontier_coverage_20": 0.020194912049919366,
"rewards/frontier_coverage_25": 0.020194912049919366,
"rewards/frontier_coverage_5": 0.020194912049919366,
"signal/accuracy_reward/centered_abs_mean": 0.1577311247587204,
"signal/accuracy_reward/group_std_mean": 0.20566980242729188,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0788655623793602,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0788655623793602,
"signal/advantage_abs_mean": 0.08244062066078187,
"signal/advantage_pre_scale_abs_mean": 0.08244062066078187,
"signal/advantage_pre_scale_std": 0.14315189719200133,
"signal/advantage_std": 0.14315189719200133,
"signal/brier_reward/centered_abs_mean": 0.11458506137132644,
"signal/brier_reward/group_std_mean": 0.15111578106880189,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011458505876362324,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011458505876362324,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03055480159819126,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04391605779528618,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003055480308830738,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003055480308830738,
"signal/format_reward/centered_abs_mean": 0.00646701380610466,
"signal/format_reward/group_std_mean": 0.014765101112425327,
"signal/format_reward/group_zero_std_frac": 0.9305555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00323350690305233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00323350690305233,
"signal/frontier_coverage_0/centered_abs_mean": 0.11731700897216797,
"signal/frontier_coverage_0/group_std_mean": 0.1610626608133316,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_1/centered_abs_mean": 0.11731700897216797,
"signal/frontier_coverage_1/group_std_mean": 0.1610626608133316,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_10/centered_abs_mean": 0.11731700897216797,
"signal/frontier_coverage_10/group_std_mean": 0.1610626608133316,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_15/centered_abs_mean": 0.11731700897216797,
"signal/frontier_coverage_15/group_std_mean": 0.1610626608133316,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_20/centered_abs_mean": 0.11731700897216797,
"signal/frontier_coverage_20/group_std_mean": 0.1610626608133316,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_25/centered_abs_mean": 0.11731700897216797,
"signal/frontier_coverage_25/group_std_mean": 0.1610626608133316,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_5/centered_abs_mean": 0.11731700897216797,
"signal/frontier_coverage_5/group_std_mean": 0.1610626608133316,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016776332166045904,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016776332166045904,
"step": 90
},
{
"calibration/aurc": 0.19124679587417506,
"calibration/batch_distribution_entropy": 0.8975058875243824,
"calibration/buffer_distribution_entropy": 0.7743957879617259,
"calibration/confidence_entropy": 0.48854993876895064,
"calibration/coverage@0%": 0.04803693795089366,
"calibration/coverage@1%": 0.08241193795089366,
"calibration/coverage@10%": 0.38390832849323486,
"calibration/coverage@15%": 0.5201580258706102,
"calibration/coverage@20%": 0.6464850084397695,
"calibration/coverage@25%": 0.6967907865926141,
"calibration/coverage@30%": 0.7429901698420995,
"calibration/coverage@5%": 0.16712853853938087,
"calibration/ece": 0.1174216128367086,
"calibration/mean_confidence": 0.624049445549762,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003819444444444442,
"completions/max_length": 3475.2,
"completions/max_terminated_length": 3475.2,
"completions/mean_length": 731.6876953125,
"completions/mean_terminated_length": 734.4987670898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.4,
"epoch": 0.22799715003562457,
"grad_norm": 0.0005185811896808445,
"learning_rate": 3.4036144578313257e-06,
"loss": -0.0028,
"num_tokens": 199091615.0,
"reward": 1.0106172680854797,
"reward_std": 0.10462814420461655,
"rewards/accuracy_reward": 0.6690972208976745,
"rewards/brier_reward": 0.814009141921997,
"rewards/confidence_uniqueness_reward": 0.9448173999786377,
"rewards/format_reward": 0.9961805462837219,
"rewards/frontier_coverage_0": 0.020936440164223313,
"rewards/frontier_coverage_1": 0.020936440164223313,
"rewards/frontier_coverage_10": 0.020936440164223313,
"rewards/frontier_coverage_15": 0.020936440164223313,
"rewards/frontier_coverage_20": 0.020936440164223313,
"rewards/frontier_coverage_25": 0.020936440164223313,
"rewards/frontier_coverage_5": 0.020936440164223313,
"signal/accuracy_reward/centered_abs_mean": 0.14444444328546524,
"signal/accuracy_reward/group_std_mean": 0.19350114166736604,
"signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07222222164273262,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07222222164273262,
"signal/advantage_abs_mean": 0.0762260913848877,
"signal/advantage_pre_scale_abs_mean": 0.0762260913848877,
"signal/advantage_pre_scale_std": 0.13657819628715515,
"signal/advantage_std": 0.13657819628715515,
"signal/brier_reward/centered_abs_mean": 0.11326702684164047,
"signal/brier_reward/group_std_mean": 0.14780981540679933,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011326702870428562,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011326702870428562,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027253608033061028,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04070703834295273,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027253609616309403,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027253609616309403,
"signal/format_reward/centered_abs_mean": 0.00711805559694767,
"signal/format_reward/group_std_mean": 0.016377711296081544,
"signal/format_reward/group_zero_std_frac": 0.9222222447395325,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003559027798473835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003559027798473835,
"signal/frontier_coverage_0/centered_abs_mean": 0.13173725605010986,
"signal/frontier_coverage_0/group_std_mean": 0.1795190155506134,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_1/centered_abs_mean": 0.13173725605010986,
"signal/frontier_coverage_1/group_std_mean": 0.1795190155506134,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_10/centered_abs_mean": 0.13173725605010986,
"signal/frontier_coverage_10/group_std_mean": 0.1795190155506134,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_15/centered_abs_mean": 0.13173725605010986,
"signal/frontier_coverage_15/group_std_mean": 0.1795190155506134,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_20/centered_abs_mean": 0.13173725605010986,
"signal/frontier_coverage_20/group_std_mean": 0.1795190155506134,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_25/centered_abs_mean": 0.13173725605010986,
"signal/frontier_coverage_25/group_std_mean": 0.1795190155506134,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_5/centered_abs_mean": 0.13173725605010986,
"signal/frontier_coverage_5/group_std_mean": 0.1795190155506134,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001883842796087265,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001883842796087265,
"step": 95
},
{
"calibration/aurc": 0.18834491364983547,
"calibration/batch_distribution_entropy": 0.8389232639652772,
"calibration/buffer_distribution_entropy": 0.7841595452485729,
"calibration/confidence_entropy": 0.4508094524363834,
"calibration/coverage@0%": 0.012541010611400316,
"calibration/coverage@1%": 0.012541010611400316,
"calibration/coverage@10%": 0.23095183433959976,
"calibration/coverage@15%": 0.41025999431848914,
"calibration/coverage@20%": 0.6366873761277325,
"calibration/coverage@25%": 0.8210446158298123,
"calibration/coverage@30%": 0.8806853070175439,
"calibration/coverage@5%": 0.08330307348017359,
"calibration/ece": 0.12463181735349024,
"calibration/mean_confidence": 0.7062022295145554,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004861111111111116,
"completions/max_length": 3220.2,
"completions/max_terminated_length": 3220.2,
"completions/mean_length": 718.14775390625,
"completions/mean_terminated_length": 721.6147583007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 209.4,
"epoch": 0.23999700003749952,
"grad_norm": 0.000564222689718008,
"learning_rate": 3.2530120481927713e-06,
"loss": -0.003,
"num_tokens": 210463749.0,
"reward": 1.0169752597808839,
"reward_std": 0.1152550533413887,
"rewards/accuracy_reward": 0.678819453716278,
"rewards/brier_reward": 0.8254865527153015,
"rewards/confidence_uniqueness_reward": 0.9365423917770386,
"rewards/format_reward": 0.9946180582046509,
"rewards/frontier_coverage_0": 0.040495523065328595,
"rewards/frontier_coverage_1": 0.040495523065328595,
"rewards/frontier_coverage_10": 0.040495523065328595,
"rewards/frontier_coverage_15": 0.040495523065328595,
"rewards/frontier_coverage_20": 0.040495523065328595,
"rewards/frontier_coverage_25": 0.040495523065328595,
"rewards/frontier_coverage_5": 0.040495523065328595,
"signal/accuracy_reward/centered_abs_mean": 0.15894097089767456,
"signal/accuracy_reward/group_std_mean": 0.2088989406824112,
"signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07947048544883728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07947048544883728,
"signal/advantage_abs_mean": 0.08560490906238556,
"signal/advantage_pre_scale_abs_mean": 0.08560490906238556,
"signal/advantage_pre_scale_std": 0.14947248101234437,
"signal/advantage_std": 0.14947248101234437,
"signal/brier_reward/centered_abs_mean": 0.12055338621139526,
"signal/brier_reward/group_std_mean": 0.15626082718372344,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012055338732898236,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012055338732898236,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03030674159526825,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04416131526231766,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030306743923574687,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030306743923574687,
"signal/format_reward/centered_abs_mean": 0.009190538339316845,
"signal/format_reward/group_std_mean": 0.018485058657824995,
"signal/format_reward/group_zero_std_frac": 0.919444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004595269169658423,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004595269169658423,
"signal/frontier_coverage_0/centered_abs_mean": 0.11489113122224807,
"signal/frontier_coverage_0/group_std_mean": 0.15738584399223327,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_1/centered_abs_mean": 0.11489113122224807,
"signal/frontier_coverage_1/group_std_mean": 0.15738584399223327,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_10/centered_abs_mean": 0.11489113122224807,
"signal/frontier_coverage_10/group_std_mean": 0.15738584399223327,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_15/centered_abs_mean": 0.11489113122224807,
"signal/frontier_coverage_15/group_std_mean": 0.15738584399223327,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_20/centered_abs_mean": 0.11489113122224807,
"signal/frontier_coverage_20/group_std_mean": 0.15738584399223327,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_25/centered_abs_mean": 0.11489113122224807,
"signal/frontier_coverage_25/group_std_mean": 0.15738584399223327,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_5/centered_abs_mean": 0.11489113122224807,
"signal/frontier_coverage_5/group_std_mean": 0.15738584399223327,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016429432900622488,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016429432900622488,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_calibration/aurc": 0.14868938105166316,
"eval_calibration/batch_distribution_entropy": 0.7457575510587467,
"eval_calibration/buffer_distribution_entropy": 0.7879834341654887,
"eval_calibration/confidence_entropy": 0.40222145732977205,
"eval_calibration/coverage@0%": 0.2552083333333333,
"eval_calibration/coverage@1%": 0.2552083333333333,
"eval_calibration/coverage@10%": 0.4583333333333333,
"eval_calibration/coverage@15%": 0.5260416666666666,
"eval_calibration/coverage@20%": 0.7604166666666666,
"eval_calibration/coverage@25%": 0.8854166666666666,
"eval_calibration/coverage@30%": 0.9583333333333334,
"eval_calibration/coverage@5%": 0.2916666666666667,
"eval_calibration/ece": 0.17504294376882312,
"eval_calibration/mean_confidence": 0.754312944601324,
"eval_completions/clipped_ratio": 0.004340277777777772,
"eval_completions/max_length": 2818.6666666666665,
"eval_completions/max_terminated_length": 2818.6666666666665,
"eval_completions/mean_length": 706.1878763834635,
"eval_completions/mean_terminated_length": 709.2273864746094,
"eval_completions/min_length": 131.5,
"eval_completions/min_terminated_length": 234.5,
"eval_loss": 0.0,
"eval_num_tokens": 210463749.0,
"eval_reward": 1.0165491104125977,
"eval_reward_std": 0.24284635484218597,
"eval_rewards/accuracy_reward": 0.6935763855775198,
"eval_rewards/brier_reward": 0.8220989306767782,
"eval_rewards/confidence_uniqueness_reward": 0.8749090135097504,
"eval_rewards/format_reward": 0.9939236044883728,
"eval_rewards/frontier_coverage_0": 0.03095191267008583,
"eval_rewards/frontier_coverage_1": 0.03095191267008583,
"eval_rewards/frontier_coverage_10": 0.03095191267008583,
"eval_rewards/frontier_coverage_15": 0.03095191267008583,
"eval_rewards/frontier_coverage_20": 0.03095191267008583,
"eval_rewards/frontier_coverage_25": 0.03095191267008583,
"eval_rewards/frontier_coverage_5": 0.03095191267008583,
"eval_runtime": 185.5248,
"eval_samples_per_second": 5.39,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4078233540058136,
"eval_signal/accuracy_reward/group_std_mean": 0.4573976546525955,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2039116770029068,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2039116770029068,
"eval_signal/advantage_abs_mean": 0.2098855177561442,
"eval_signal/advantage_pre_scale_abs_mean": 0.2098855177561442,
"eval_signal/advantage_pre_scale_std": 0.24122367799282074,
"eval_signal/advantage_std": 0.24122367799282074,
"eval_signal/brier_reward/centered_abs_mean": 0.20792252322038016,
"eval_signal/brier_reward/group_std_mean": 0.26402051995197934,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02079225331544876,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02079225331544876,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05713109113276005,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08232143521308899,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005713109392672777,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005713109392672777,
"eval_signal/format_reward/centered_abs_mean": 0.011773003110041222,
"eval_signal/format_reward/group_std_mean": 0.034373246443768345,
"eval_signal/format_reward/group_zero_std_frac": 0.8055555721124014,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005886501555020611,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.005886501555020611,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.13855222860972086,
"eval_signal/frontier_coverage_0/group_std_mean": 0.23446151365836462,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.13855222860972086,
"eval_signal/frontier_coverage_1/group_std_mean": 0.23446151365836462,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.13855222860972086,
"eval_signal/frontier_coverage_10/group_std_mean": 0.23446151365836462,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.13855222860972086,
"eval_signal/frontier_coverage_15/group_std_mean": 0.23446151365836462,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.13855222860972086,
"eval_signal/frontier_coverage_20/group_std_mean": 0.23446151365836462,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.13855222860972086,
"eval_signal/frontier_coverage_25/group_std_mean": 0.23446151365836462,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.13855222860972086,
"eval_signal/frontier_coverage_5/group_std_mean": 0.23446151365836462,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019812969064029553,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019812969064029553,
"eval_steps_per_second": 0.032,
"step": 100
},
{
"calibration/aurc": 0.2962014591980389,
"calibration/batch_distribution_entropy": 0.7614938360584489,
"calibration/buffer_distribution_entropy": 0.7884020557358128,
"calibration/confidence_entropy": 0.4082213590920844,
"calibration/coverage@0%": 0.006255439512619669,
"calibration/coverage@1%": 0.006255439512619669,
"calibration/coverage@10%": 0.12406077748767044,
"calibration/coverage@15%": 0.14494850594720046,
"calibration/coverage@20%": 0.34461551711633304,
"calibration/coverage@25%": 0.4896351900203076,
"calibration/coverage@30%": 0.5572739882506528,
"calibration/coverage@5%": 0.07414055700609226,
"calibration/ece": 0.18316298996542563,
"calibration/mean_confidence": 0.7656324318669362,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004947916666666674,
"completions/max_length": 3749.6,
"completions/max_terminated_length": 3749.6,
"completions/mean_length": 706.183251953125,
"completions/mean_terminated_length": 709.6047241210938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.8,
"epoch": 0.2519968500393745,
"grad_norm": 0.0007198955281637609,
"learning_rate": 3.1024096385542172e-06,
"loss": -0.0035,
"num_tokens": 221675844.0,
"reward": 1.012697958946228,
"reward_std": 0.11043240427970887,
"rewards/accuracy_reward": 0.6752604126930237,
"rewards/brier_reward": 0.8130281567573547,
"rewards/confidence_uniqueness_reward": 0.9284329533576965,
"rewards/format_reward": 0.9948784708976746,
"rewards/frontier_coverage_0": 0.03478905353695154,
"rewards/frontier_coverage_1": 0.03478905353695154,
"rewards/frontier_coverage_10": 0.03478905353695154,
"rewards/frontier_coverage_15": 0.03478905353695154,
"rewards/frontier_coverage_20": 0.03478905353695154,
"rewards/frontier_coverage_25": 0.03478905353695154,
"rewards/frontier_coverage_5": 0.03478905353695154,
"signal/accuracy_reward/centered_abs_mean": 0.1487250432372093,
"signal/accuracy_reward/group_std_mean": 0.19329376816749572,
"signal/accuracy_reward/group_zero_std_frac": 0.4666666805744171,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07436252161860465,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07436252161860465,
"signal/advantage_abs_mean": 0.08278846144676208,
"signal/advantage_pre_scale_abs_mean": 0.08278846144676208,
"signal/advantage_pre_scale_std": 0.14614979028701783,
"signal/advantage_std": 0.14614979028701783,
"signal/brier_reward/centered_abs_mean": 0.12054271399974822,
"signal/brier_reward/group_std_mean": 0.15707246363162994,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01205427125096321,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01205427125096321,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03356652893126011,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04841043651103973,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003356653032824397,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003356653032824397,
"signal/format_reward/centered_abs_mean": 0.007872178638353943,
"signal/format_reward/group_std_mean": 0.017148750275373457,
"signal/format_reward/group_zero_std_frac": 0.919444465637207,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0039360893191769716,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0039360893191769716,
"signal/frontier_coverage_0/centered_abs_mean": 0.10029449313879013,
"signal/frontier_coverage_0/group_std_mean": 0.1406713530421257,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_1/centered_abs_mean": 0.10029449313879013,
"signal/frontier_coverage_1/group_std_mean": 0.1406713530421257,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_10/centered_abs_mean": 0.10029449313879013,
"signal/frontier_coverage_10/group_std_mean": 0.1406713530421257,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_15/centered_abs_mean": 0.10029449313879013,
"signal/frontier_coverage_15/group_std_mean": 0.1406713530421257,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_20/centered_abs_mean": 0.10029449313879013,
"signal/frontier_coverage_20/group_std_mean": 0.1406713530421257,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_25/centered_abs_mean": 0.10029449313879013,
"signal/frontier_coverage_25/group_std_mean": 0.1406713530421257,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_5/centered_abs_mean": 0.10029449313879013,
"signal/frontier_coverage_5/group_std_mean": 0.1406713530421257,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014342111768200993,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014342111768200993,
"step": 105
},
{
"calibration/aurc": 0.13740946154687023,
"calibration/batch_distribution_entropy": 0.7387166352713965,
"calibration/buffer_distribution_entropy": 0.7875437463831182,
"calibration/confidence_entropy": 0.3854279778031112,
"calibration/coverage@0%": 0.005734620418848168,
"calibration/coverage@1%": 0.005734620418848168,
"calibration/coverage@10%": 0.4128860345212545,
"calibration/coverage@15%": 0.639678830954757,
"calibration/coverage@20%": 0.7715804973821989,
"calibration/coverage@25%": 0.8597158595113438,
"calibration/coverage@30%": 0.9363928883071553,
"calibration/coverage@5%": 0.24802201094509685,
"calibration/ece": 0.10715767769046516,
"calibration/mean_confidence": 0.7670220325744023,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0035590277777777677,
"completions/max_length": 3449.4,
"completions/max_terminated_length": 3449.4,
"completions/mean_length": 713.4057373046875,
"completions/mean_terminated_length": 715.9921508789063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.2639967000412495,
"grad_norm": 0.0006309704622253776,
"learning_rate": 2.9518072289156627e-06,
"loss": -0.0015,
"num_tokens": 233002726.0,
"reward": 1.0424779415130616,
"reward_std": 0.10481331050395966,
"rewards/accuracy_reward": 0.7316840291023254,
"rewards/brier_reward": 0.8372745990753174,
"rewards/confidence_uniqueness_reward": 0.9280092358589173,
"rewards/format_reward": 0.99609375,
"rewards/frontier_coverage_0": 0.020585645362734793,
"rewards/frontier_coverage_1": 0.020585645362734793,
"rewards/frontier_coverage_10": 0.020585645362734793,
"rewards/frontier_coverage_15": 0.020585645362734793,
"rewards/frontier_coverage_20": 0.020585645362734793,
"rewards/frontier_coverage_25": 0.020585645362734793,
"rewards/frontier_coverage_5": 0.020585645362734793,
"signal/accuracy_reward/centered_abs_mean": 0.13774413764476776,
"signal/accuracy_reward/group_std_mean": 0.1850327730178833,
"signal/accuracy_reward/group_zero_std_frac": 0.45555556416511533,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06887206882238388,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06887206882238388,
"signal/advantage_abs_mean": 0.07664794921875,
"signal/advantage_pre_scale_abs_mean": 0.07664794921875,
"signal/advantage_pre_scale_std": 0.1405678302049637,
"signal/advantage_std": 0.1405678302049637,
"signal/brier_reward/centered_abs_mean": 0.11345363408327103,
"signal/brier_reward/group_std_mean": 0.15164164900779725,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011345363780856132,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011345363780856132,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033889131247997285,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04748179391026497,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033889132551848888,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033889132551848888,
"signal/format_reward/centered_abs_mean": 0.0070800781715661286,
"signal/format_reward/group_std_mean": 0.014973613433539867,
"signal/format_reward/group_zero_std_frac": 0.9333333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0035400390857830643,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0035400390857830643,
"signal/frontier_coverage_0/centered_abs_mean": 0.10066508799791336,
"signal/frontier_coverage_0/group_std_mean": 0.14207510650157928,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_1/centered_abs_mean": 0.10066508799791336,
"signal/frontier_coverage_1/group_std_mean": 0.14207510650157928,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_10/centered_abs_mean": 0.10066508799791336,
"signal/frontier_coverage_10/group_std_mean": 0.14207510650157928,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_15/centered_abs_mean": 0.10066508799791336,
"signal/frontier_coverage_15/group_std_mean": 0.14207510650157928,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_20/centered_abs_mean": 0.10066508799791336,
"signal/frontier_coverage_20/group_std_mean": 0.14207510650157928,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_25/centered_abs_mean": 0.10066508799791336,
"signal/frontier_coverage_25/group_std_mean": 0.14207510650157928,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_5/centered_abs_mean": 0.10066508799791336,
"signal/frontier_coverage_5/group_std_mean": 0.14207510650157928,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014395107515156268,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014395107515156268,
"step": 110
},
{
"calibration/aurc": 0.2610249615426862,
"calibration/batch_distribution_entropy": 0.8931124788213323,
"calibration/buffer_distribution_entropy": 0.7904711233092072,
"calibration/confidence_entropy": 0.4444131127935872,
"calibration/coverage@0%": 0.0183356728183972,
"calibration/coverage@1%": 0.0183356728183972,
"calibration/coverage@10%": 0.1648454105202722,
"calibration/coverage@15%": 0.23299114257284842,
"calibration/coverage@20%": 0.44790129663351574,
"calibration/coverage@25%": 0.5566237309402716,
"calibration/coverage@30%": 0.6543754294352068,
"calibration/coverage@5%": 0.06807389271368516,
"calibration/ece": 0.20020606774460367,
"calibration/mean_confidence": 0.6489293143319701,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00512152777777779,
"completions/max_length": 3599.4,
"completions/max_terminated_length": 3599.4,
"completions/mean_length": 718.4827392578125,
"completions/mean_terminated_length": 722.2009521484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.27599655004312446,
"grad_norm": 0.0005341703654266894,
"learning_rate": 2.8012048192771087e-06,
"loss": -0.0034,
"num_tokens": 244358847.0,
"reward": 1.0126498460769653,
"reward_std": 0.10888111293315887,
"rewards/accuracy_reward": 0.6730902791023254,
"rewards/brier_reward": 0.8102145433425904,
"rewards/confidence_uniqueness_reward": 0.9390350937843323,
"rewards/format_reward": 0.9948784708976746,
"rewards/frontier_coverage_0": 0.037367334216833116,
"rewards/frontier_coverage_1": 0.037367334216833116,
"rewards/frontier_coverage_10": 0.037367334216833116,
"rewards/frontier_coverage_15": 0.037367334216833116,
"rewards/frontier_coverage_20": 0.037367334216833116,
"rewards/frontier_coverage_25": 0.037367334216833116,
"rewards/frontier_coverage_5": 0.037367334216833116,
"signal/accuracy_reward/centered_abs_mean": 0.1502712696790695,
"signal/accuracy_reward/group_std_mean": 0.19435304999351502,
"signal/accuracy_reward/group_zero_std_frac": 0.46111111640930175,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07513563483953475,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07513563483953475,
"signal/advantage_abs_mean": 0.08177377581596375,
"signal/advantage_pre_scale_abs_mean": 0.08177377581596375,
"signal/advantage_pre_scale_std": 0.1430963695049286,
"signal/advantage_std": 0.1430963695049286,
"signal/brier_reward/centered_abs_mean": 0.12979107797145845,
"signal/brier_reward/group_std_mean": 0.16742580831050874,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012979108095169067,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012979108095169067,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031050733104348183,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046174564957618715,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00310507332906127,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00310507332906127,
"signal/format_reward/centered_abs_mean": 0.008848741184920073,
"signal/format_reward/group_std_mean": 0.01954154595732689,
"signal/format_reward/group_zero_std_frac": 0.9055555582046508,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004424370592460036,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004424370592460036,
"signal/frontier_coverage_0/centered_abs_mean": 0.13977960646152496,
"signal/frontier_coverage_0/group_std_mean": 0.19083205461502076,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_1/centered_abs_mean": 0.13977960646152496,
"signal/frontier_coverage_1/group_std_mean": 0.19083205461502076,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_10/centered_abs_mean": 0.13977960646152496,
"signal/frontier_coverage_10/group_std_mean": 0.19083205461502076,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_15/centered_abs_mean": 0.13977960646152496,
"signal/frontier_coverage_15/group_std_mean": 0.19083205461502076,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_20/centered_abs_mean": 0.13977960646152496,
"signal/frontier_coverage_20/group_std_mean": 0.19083205461502076,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_25/centered_abs_mean": 0.13977960646152496,
"signal/frontier_coverage_25/group_std_mean": 0.19083205461502076,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_5/centered_abs_mean": 0.13977960646152496,
"signal/frontier_coverage_5/group_std_mean": 0.19083205461502076,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001998848305083811,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001998848305083811,
"step": 115
},
{
"calibration/aurc": 0.2399620098631861,
"calibration/batch_distribution_entropy": 0.9100376139400339,
"calibration/buffer_distribution_entropy": 0.8002528654915221,
"calibration/confidence_entropy": 0.45853235863182074,
"calibration/coverage@0%": 0.08355148342059338,
"calibration/coverage@1%": 0.08355148342059338,
"calibration/coverage@10%": 0.3862074607329843,
"calibration/coverage@15%": 0.47245582460732977,
"calibration/coverage@20%": 0.5226030759162303,
"calibration/coverage@25%": 0.558074280104712,
"calibration/coverage@30%": 0.6717141143106458,
"calibration/coverage@5%": 0.21099476439790577,
"calibration/ece": 0.16576287831460218,
"calibration/mean_confidence": 0.6249067958530972,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006510416666666674,
"completions/max_length": 3755.8,
"completions/max_terminated_length": 3755.8,
"completions/mean_length": 715.130029296875,
"completions/mean_terminated_length": 719.80126953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.6,
"epoch": 0.28799640004499943,
"grad_norm": 0.00040327879833057523,
"learning_rate": 2.6506024096385547e-06,
"loss": -0.0054,
"num_tokens": 255679001.0,
"reward": 1.0224328994750977,
"reward_std": 0.10297303348779678,
"rewards/accuracy_reward": 0.6940972328186035,
"rewards/brier_reward": 0.8147106170654297,
"rewards/confidence_uniqueness_reward": 0.9446017503738403,
"rewards/format_reward": 0.9934895753860473,
"rewards/frontier_coverage_0": 0.027055247500538827,
"rewards/frontier_coverage_1": 0.027055247500538827,
"rewards/frontier_coverage_10": 0.027055247500538827,
"rewards/frontier_coverage_15": 0.027055247500538827,
"rewards/frontier_coverage_20": 0.027055247500538827,
"rewards/frontier_coverage_25": 0.027055247500538827,
"rewards/frontier_coverage_5": 0.027055247500538827,
"signal/accuracy_reward/centered_abs_mean": 0.14014756679534912,
"signal/accuracy_reward/group_std_mean": 0.18854482769966124,
"signal/accuracy_reward/group_zero_std_frac": 0.4555555522441864,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07007378339767456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07007378339767456,
"signal/advantage_abs_mean": 0.07467132210731506,
"signal/advantage_pre_scale_abs_mean": 0.07467132210731506,
"signal/advantage_pre_scale_std": 0.13559393733739852,
"signal/advantage_std": 0.13559393733739852,
"signal/brier_reward/centered_abs_mean": 0.12669853121042252,
"signal/brier_reward/group_std_mean": 0.16605999767780305,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012669852934777737,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012669852934777737,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03032144792377949,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04388071969151497,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030321448110044004,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030321448110044004,
"signal/format_reward/centered_abs_mean": 0.010639105830341578,
"signal/format_reward/group_std_mean": 0.01954000908881426,
"signal/format_reward/group_zero_std_frac": 0.919444465637207,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005319552915170789,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005319552915170789,
"signal/frontier_coverage_0/centered_abs_mean": 0.1568516492843628,
"signal/frontier_coverage_0/group_std_mean": 0.21309016048908233,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_1/centered_abs_mean": 0.1568516492843628,
"signal/frontier_coverage_1/group_std_mean": 0.21309016048908233,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_10/centered_abs_mean": 0.1568516492843628,
"signal/frontier_coverage_10/group_std_mean": 0.21309016048908233,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_15/centered_abs_mean": 0.1568516492843628,
"signal/frontier_coverage_15/group_std_mean": 0.21309016048908233,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_20/centered_abs_mean": 0.1568516492843628,
"signal/frontier_coverage_20/group_std_mean": 0.21309016048908233,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_25/centered_abs_mean": 0.1568516492843628,
"signal/frontier_coverage_25/group_std_mean": 0.21309016048908233,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_5/centered_abs_mean": 0.1568516492843628,
"signal/frontier_coverage_5/group_std_mean": 0.21309016048908233,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022429785691201686,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022429785691201686,
"step": 120
},
{
"calibration/aurc": 0.149045402492001,
"calibration/batch_distribution_entropy": 0.7272813670961927,
"calibration/buffer_distribution_entropy": 0.8062674007120527,
"calibration/confidence_entropy": 0.3572362759071208,
"calibration/coverage@0%": 0.033903769841269846,
"calibration/coverage@1%": 0.033903769841269846,
"calibration/coverage@10%": 0.38658952919723155,
"calibration/coverage@15%": 0.5611592033099867,
"calibration/coverage@20%": 0.7252933019741113,
"calibration/coverage@25%": 0.8832031735670769,
"calibration/coverage@30%": 0.9783813909956208,
"calibration/coverage@5%": 0.08807043650793651,
"calibration/ece": 0.12568334858621172,
"calibration/mean_confidence": 0.7540773271048653,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0043402777777777676,
"completions/max_length": 3024.4,
"completions/max_terminated_length": 3024.4,
"completions/mean_length": 711.7556518554687,
"completions/mean_terminated_length": 714.9013427734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.0,
"epoch": 0.2999962500468744,
"grad_norm": 0.0005117288092151284,
"learning_rate": 2.5e-06,
"loss": -0.005,
"num_tokens": 266996090.0,
"reward": 1.029345703125,
"reward_std": 0.109001125395298,
"rewards/accuracy_reward": 0.7084201455116272,
"rewards/brier_reward": 0.823661994934082,
"rewards/confidence_uniqueness_reward": 0.9209244966506958,
"rewards/format_reward": 0.9956597208976745,
"rewards/frontier_coverage_0": 0.02844259552657604,
"rewards/frontier_coverage_1": 0.02844259552657604,
"rewards/frontier_coverage_10": 0.02844259552657604,
"rewards/frontier_coverage_15": 0.02844259552657604,
"rewards/frontier_coverage_20": 0.02844259552657604,
"rewards/frontier_coverage_25": 0.02844259552657604,
"rewards/frontier_coverage_5": 0.02844259552657604,
"signal/accuracy_reward/centered_abs_mean": 0.14586045742034912,
"signal/accuracy_reward/group_std_mean": 0.19265780448913575,
"signal/accuracy_reward/group_zero_std_frac": 0.4555555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07293022871017456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07293022871017456,
"signal/advantage_abs_mean": 0.08043320327997208,
"signal/advantage_pre_scale_abs_mean": 0.08043320327997208,
"signal/advantage_pre_scale_std": 0.14412462413311006,
"signal/advantage_std": 0.14412462413311006,
"signal/brier_reward/centered_abs_mean": 0.12533471435308458,
"signal/brier_reward/group_std_mean": 0.166182804107666,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01253347136080265,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01253347136080265,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04015061669051647,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05653024539351463,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004015061818063259,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004015061818063259,
"signal/format_reward/centered_abs_mean": 0.007953558850567788,
"signal/format_reward/group_std_mean": 0.01796704400330782,
"signal/format_reward/group_zero_std_frac": 0.9138888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003976779425283894,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003976779425283894,
"signal/frontier_coverage_0/centered_abs_mean": 0.11626611351966858,
"signal/frontier_coverage_0/group_std_mean": 0.16440567672252654,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_1/centered_abs_mean": 0.11626611351966858,
"signal/frontier_coverage_1/group_std_mean": 0.16440567672252654,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_10/centered_abs_mean": 0.11626611351966858,
"signal/frontier_coverage_10/group_std_mean": 0.16440567672252654,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_15/centered_abs_mean": 0.11626611351966858,
"signal/frontier_coverage_15/group_std_mean": 0.16440567672252654,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_20/centered_abs_mean": 0.11626611351966858,
"signal/frontier_coverage_20/group_std_mean": 0.16440567672252654,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_25/centered_abs_mean": 0.11626611351966858,
"signal/frontier_coverage_25/group_std_mean": 0.16440567672252654,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_5/centered_abs_mean": 0.11626611351966858,
"signal/frontier_coverage_5/group_std_mean": 0.16440567672252654,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016626053722575307,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016626053722575307,
"step": 125
},
{
"calibration/aurc": 0.2498398374245417,
"calibration/batch_distribution_entropy": 0.5941785632997885,
"calibration/buffer_distribution_entropy": 0.8031562502076233,
"calibration/confidence_entropy": 0.2781564688238765,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.06931798188510341,
"calibration/coverage@15%": 0.21769957293228143,
"calibration/coverage@20%": 0.41865692715334324,
"calibration/coverage@25%": 0.576949097598052,
"calibration/coverage@30%": 0.82877011358758,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.21146582076672024,
"calibration/mean_confidence": 0.8089417351778607,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011979166666666674,
"completions/max_length": 3381.8,
"completions/max_terminated_length": 3381.8,
"completions/mean_length": 746.9890747070312,
"completions/mean_terminated_length": 756.1432006835937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.8,
"epoch": 0.3119961000487494,
"grad_norm": 0.0005842032842338085,
"learning_rate": 2.349397590361446e-06,
"loss": -0.0114,
"num_tokens": 278726204.0,
"reward": 0.9973063111305237,
"reward_std": 0.13256770521402358,
"rewards/accuracy_reward": 0.6662326455116272,
"rewards/brier_reward": 0.7879875302314758,
"rewards/confidence_uniqueness_reward": 0.8794443368911743,
"rewards/format_reward": 0.9878472089767456,
"rewards/frontier_coverage_0": 0.03522439245134592,
"rewards/frontier_coverage_1": 0.03522439245134592,
"rewards/frontier_coverage_10": 0.03522439245134592,
"rewards/frontier_coverage_15": 0.03522439245134592,
"rewards/frontier_coverage_20": 0.03522439245134592,
"rewards/frontier_coverage_25": 0.03502925205975771,
"rewards/frontier_coverage_5": 0.03522439245134592,
"signal/accuracy_reward/centered_abs_mean": 0.1634385883808136,
"signal/accuracy_reward/group_std_mean": 0.21186422407627106,
"signal/accuracy_reward/group_zero_std_frac": 0.4138888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0817192941904068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0817192941904068,
"signal/advantage_abs_mean": 0.09865063428878784,
"signal/advantage_pre_scale_abs_mean": 0.09865063428878784,
"signal/advantage_pre_scale_std": 0.17027110159397124,
"signal/advantage_std": 0.17027110159397124,
"signal/brier_reward/centered_abs_mean": 0.1411220982670784,
"signal/brier_reward/group_std_mean": 0.18561237156391144,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01411221083253622,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01411221083253622,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06204545646905899,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08459463864564895,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006204545777291059,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006204545777291059,
"signal/format_reward/centered_abs_mean": 0.01944444477558136,
"signal/format_reward/group_std_mean": 0.03500307872891426,
"signal/format_reward/group_zero_std_frac": 0.8583333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00972222238779068,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00972222238779068,
"signal/frontier_coverage_0/centered_abs_mean": 0.08938175737857819,
"signal/frontier_coverage_0/group_std_mean": 0.13144375383853912,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_1/centered_abs_mean": 0.08938175737857819,
"signal/frontier_coverage_1/group_std_mean": 0.13144375383853912,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_10/centered_abs_mean": 0.08938175737857819,
"signal/frontier_coverage_10/group_std_mean": 0.13144375383853912,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_15/centered_abs_mean": 0.08938175737857819,
"signal/frontier_coverage_15/group_std_mean": 0.13144375383853912,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_20/centered_abs_mean": 0.08938175737857819,
"signal/frontier_coverage_20/group_std_mean": 0.13144375383853912,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_25/centered_abs_mean": 0.08872648626565934,
"signal/frontier_coverage_25/group_std_mean": 0.13051423877477647,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012687887530773879,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012687887530773879,
"signal/frontier_coverage_5/centered_abs_mean": 0.08938175737857819,
"signal/frontier_coverage_5/group_std_mean": 0.13144375383853912,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012781591154634952,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012781591154634952,
"step": 130
},
{
"calibration/aurc": 0.20665346427154546,
"calibration/batch_distribution_entropy": 0.5376576985579862,
"calibration/buffer_distribution_entropy": 0.7984151845186054,
"calibration/confidence_entropy": 0.27916214521537636,
"calibration/coverage@0%": 0.06096666666666667,
"calibration/coverage@1%": 0.07971666666666667,
"calibration/coverage@10%": 0.2754303664921466,
"calibration/coverage@15%": 0.4312977312390925,
"calibration/coverage@20%": 0.4989975567190227,
"calibration/coverage@25%": 0.6582754683107096,
"calibration/coverage@30%": 0.7717496206580728,
"calibration/coverage@5%": 0.2133375,
"calibration/ece": 0.17591854873155563,
"calibration/mean_confidence": 0.8414116772058632,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007986111111111093,
"completions/max_length": 3697.2,
"completions/max_terminated_length": 3697.2,
"completions/mean_length": 741.0501708984375,
"completions/mean_terminated_length": 747.028369140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.32399595005062437,
"grad_norm": 0.0005643125041387975,
"learning_rate": 2.1987951807228917e-06,
"loss": -0.0065,
"num_tokens": 290356126.0,
"reward": 1.0172087907791139,
"reward_std": 0.11705570220947266,
"rewards/accuracy_reward": 0.7008680462837219,
"rewards/brier_reward": 0.8044859409332276,
"rewards/confidence_uniqueness_reward": 0.8768606543540954,
"rewards/format_reward": 0.9919270873069763,
"rewards/frontier_coverage_0": 0.026983942463994028,
"rewards/frontier_coverage_1": 0.026983942463994028,
"rewards/frontier_coverage_10": 0.026983942463994028,
"rewards/frontier_coverage_15": 0.026983942463994028,
"rewards/frontier_coverage_20": 0.026983942463994028,
"rewards/frontier_coverage_25": 0.025268368422985077,
"rewards/frontier_coverage_5": 0.026983942463994028,
"signal/accuracy_reward/centered_abs_mean": 0.1438259571790695,
"signal/accuracy_reward/group_std_mean": 0.19028924107551576,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07191297858953476,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07191297858953476,
"signal/advantage_abs_mean": 0.087016960978508,
"signal/advantage_pre_scale_abs_mean": 0.087016960978508,
"signal/advantage_pre_scale_std": 0.15853114426136017,
"signal/advantage_std": 0.15853114426136017,
"signal/brier_reward/centered_abs_mean": 0.1297492727637291,
"signal/brier_reward/group_std_mean": 0.17122452557086945,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012974927946925164,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012974927946925164,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05996449142694473,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07999310791492462,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059964492917060856,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059964492917060856,
"signal/format_reward/centered_abs_mean": 0.01330837681889534,
"signal/format_reward/group_std_mean": 0.02422129511833191,
"signal/format_reward/group_zero_std_frac": 0.9000000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00665418840944767,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00665418840944767,
"signal/frontier_coverage_0/centered_abs_mean": 0.07249849885702134,
"signal/frontier_coverage_0/group_std_mean": 0.11098081171512604,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_1/centered_abs_mean": 0.07249849885702134,
"signal/frontier_coverage_1/group_std_mean": 0.11098081171512604,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_10/centered_abs_mean": 0.07249849885702134,
"signal/frontier_coverage_10/group_std_mean": 0.11098081171512604,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_15/centered_abs_mean": 0.07249849885702134,
"signal/frontier_coverage_15/group_std_mean": 0.11098081171512604,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_20/centered_abs_mean": 0.07249849885702134,
"signal/frontier_coverage_20/group_std_mean": 0.11098081171512604,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_25/centered_abs_mean": 0.06090174987912178,
"signal/frontier_coverage_25/group_std_mean": 0.09414784163236618,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008708950132131577,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008708950132131577,
"signal/frontier_coverage_5/centered_abs_mean": 0.07249849885702134,
"signal/frontier_coverage_5/group_std_mean": 0.11098081171512604,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010367285343818367,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010367285343818367,
"step": 135
},
{
"calibration/aurc": 0.13807296148494613,
"calibration/batch_distribution_entropy": 0.5912922286361493,
"calibration/buffer_distribution_entropy": 0.7990959992248101,
"calibration/confidence_entropy": 0.3053410751404527,
"calibration/coverage@0%": 0.018230563002680965,
"calibration/coverage@1%": 0.018230563002680965,
"calibration/coverage@10%": 0.47200026952869323,
"calibration/coverage@15%": 0.6143298179119846,
"calibration/coverage@20%": 0.7965660023121732,
"calibration/coverage@25%": 0.9006813786793784,
"calibration/coverage@30%": 0.9573407557430865,
"calibration/coverage@5%": 0.19797422633505762,
"calibration/ece": 0.15729955684106034,
"calibration/mean_confidence": 0.827820182834459,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011197916666666674,
"completions/max_length": 3481.4,
"completions/max_terminated_length": 3481.4,
"completions/mean_length": 742.7499145507812,
"completions/mean_terminated_length": 751.1475830078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 217.0,
"epoch": 0.33599580005249935,
"grad_norm": 0.0004445806553121656,
"learning_rate": 2.0481927710843377e-06,
"loss": -0.0094,
"num_tokens": 302016829.0,
"reward": 1.0130672454833984,
"reward_std": 0.1154438465833664,
"rewards/accuracy_reward": 0.6948784708976745,
"rewards/brier_reward": 0.8031974673271179,
"rewards/confidence_uniqueness_reward": 0.883224892616272,
"rewards/format_reward": 0.9887152791023255,
"rewards/frontier_coverage_0": 0.026858755480498077,
"rewards/frontier_coverage_1": 0.026858755480498077,
"rewards/frontier_coverage_10": 0.026858755480498077,
"rewards/frontier_coverage_15": 0.026858755480498077,
"rewards/frontier_coverage_20": 0.026858755480498077,
"rewards/frontier_coverage_25": 0.02263545459136367,
"rewards/frontier_coverage_5": 0.026858755480498077,
"signal/accuracy_reward/centered_abs_mean": 0.12689344882965087,
"signal/accuracy_reward/group_std_mean": 0.17651031613349916,
"signal/accuracy_reward/group_zero_std_frac": 0.4694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06344672441482543,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06344672441482543,
"signal/advantage_abs_mean": 0.08137730211019516,
"signal/advantage_pre_scale_abs_mean": 0.08137730211019516,
"signal/advantage_pre_scale_std": 0.1559952199459076,
"signal/advantage_std": 0.1559952199459076,
"signal/brier_reward/centered_abs_mean": 0.12170794308185577,
"signal/brier_reward/group_std_mean": 0.16413157284259797,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0121707946062088,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0121707946062088,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05902692675590515,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08175166472792625,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005902692675590515,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005902692675590515,
"signal/format_reward/centered_abs_mean": 0.016840277798473836,
"signal/format_reward/group_std_mean": 0.03128794245421886,
"signal/format_reward/group_zero_std_frac": 0.8750000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008420138899236918,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008420138899236918,
"signal/frontier_coverage_0/centered_abs_mean": 0.06246669292449951,
"signal/frontier_coverage_0/group_std_mean": 0.09620369970798492,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_1/centered_abs_mean": 0.06246669292449951,
"signal/frontier_coverage_1/group_std_mean": 0.09620369970798492,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_10/centered_abs_mean": 0.06246669292449951,
"signal/frontier_coverage_10/group_std_mean": 0.09620369970798492,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_15/centered_abs_mean": 0.06246669292449951,
"signal/frontier_coverage_15/group_std_mean": 0.09620369970798492,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_20/centered_abs_mean": 0.06246669292449951,
"signal/frontier_coverage_20/group_std_mean": 0.09620369970798492,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_25/centered_abs_mean": 0.045351064205169676,
"signal/frontier_coverage_25/group_std_mean": 0.07125220820307732,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006485202291514724,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006485202291514724,
"signal/frontier_coverage_5/centered_abs_mean": 0.06246669292449951,
"signal/frontier_coverage_5/group_std_mean": 0.09620369970798492,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008932736935093998,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008932736935093998,
"step": 140
},
{
"calibration/aurc": 0.15873093506050334,
"calibration/batch_distribution_entropy": 0.7353718083783116,
"calibration/buffer_distribution_entropy": 0.8072198353385431,
"calibration/confidence_entropy": 0.35873181163031825,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.06219839142091153,
"calibration/coverage@10%": 0.39977334249221996,
"calibration/coverage@15%": 0.5484208791798558,
"calibration/coverage@20%": 0.7012722080481055,
"calibration/coverage@25%": 0.7817749343832021,
"calibration/coverage@30%": 0.8312116584385285,
"calibration/coverage@5%": 0.2308831483301228,
"calibration/ece": 0.16048376103259138,
"calibration/mean_confidence": 0.7810990624058293,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011197916666666651,
"completions/max_length": 3506.4,
"completions/max_terminated_length": 3506.4,
"completions/mean_length": 735.0325561523438,
"completions/mean_terminated_length": 743.3595581054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 229.2,
"epoch": 0.34799565005437433,
"grad_norm": 0.0003804602602031082,
"learning_rate": 1.8975903614457832e-06,
"loss": -0.0092,
"num_tokens": 313549012.0,
"reward": 1.0284262537956237,
"reward_std": 0.11307135671377182,
"rewards/accuracy_reward": 0.7163194417953491,
"rewards/brier_reward": 0.8276101469993591,
"rewards/confidence_uniqueness_reward": 0.9076523542404175,
"rewards/format_reward": 0.9887152791023255,
"rewards/frontier_coverage_0": 0.02373262830078602,
"rewards/frontier_coverage_1": 0.02373262830078602,
"rewards/frontier_coverage_10": 0.02373262830078602,
"rewards/frontier_coverage_15": 0.02373262830078602,
"rewards/frontier_coverage_20": 0.023653368651866912,
"rewards/frontier_coverage_25": 0.024303621798753738,
"rewards/frontier_coverage_5": 0.02373262830078602,
"signal/accuracy_reward/centered_abs_mean": 0.13071831464767455,
"signal/accuracy_reward/group_std_mean": 0.17358810007572173,
"signal/accuracy_reward/group_zero_std_frac": 0.49444445967674255,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06535915732383728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06535915732383728,
"signal/advantage_abs_mean": 0.08121174573898315,
"signal/advantage_pre_scale_abs_mean": 0.08121174573898315,
"signal/advantage_pre_scale_std": 0.15750052332878112,
"signal/advantage_std": 0.15750052332878112,
"signal/brier_reward/centered_abs_mean": 0.11548198312520981,
"signal/brier_reward/group_std_mean": 0.1539760112762451,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011548198573291302,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011548198573291302,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0547367163002491,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07798558920621872,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005473671574145555,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005473671574145555,
"signal/format_reward/centered_abs_mean": 0.019759114272892474,
"signal/format_reward/group_std_mean": 0.0363234143704176,
"signal/format_reward/group_zero_std_frac": 0.8555555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009879557136446237,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009879557136446237,
"signal/frontier_coverage_0/centered_abs_mean": 0.07713357806205749,
"signal/frontier_coverage_0/group_std_mean": 0.10995967090129852,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_1/centered_abs_mean": 0.07713357806205749,
"signal/frontier_coverage_1/group_std_mean": 0.10995967090129852,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_10/centered_abs_mean": 0.07713357806205749,
"signal/frontier_coverage_10/group_std_mean": 0.10995967090129852,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_15/centered_abs_mean": 0.07713357806205749,
"signal/frontier_coverage_15/group_std_mean": 0.10995967090129852,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_20/centered_abs_mean": 0.06395274251699448,
"signal/frontier_coverage_20/group_std_mean": 0.09226414263248443,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009145242162048817,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009145242162048817,
"signal/frontier_coverage_25/centered_abs_mean": 0.04583085030317306,
"signal/frontier_coverage_25/group_std_mean": 0.06589328721165658,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006553811486810446,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006553811486810446,
"signal/frontier_coverage_5/centered_abs_mean": 0.07713357806205749,
"signal/frontier_coverage_5/group_std_mean": 0.10995967090129852,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0011030101682990789,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011030101682990789,
"step": 145
},
{
"calibration/aurc": 0.19848453530632176,
"calibration/batch_distribution_entropy": 0.8353200249126764,
"calibration/buffer_distribution_entropy": 0.8180778954803165,
"calibration/confidence_entropy": 0.4378518057416948,
"calibration/coverage@0%": 0.014285714285714285,
"calibration/coverage@1%": 0.014285714285714285,
"calibration/coverage@10%": 0.3476190476190476,
"calibration/coverage@15%": 0.39187699448796054,
"calibration/coverage@20%": 0.5605820490577851,
"calibration/coverage@25%": 0.6765151459405365,
"calibration/coverage@30%": 0.7895631984795912,
"calibration/coverage@5%": 0.16589781746031745,
"calibration/ece": 0.14407072392639744,
"calibration/mean_confidence": 0.7014389534257306,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007986111111111116,
"completions/max_length": 3675.6,
"completions/max_terminated_length": 3675.6,
"completions/mean_length": 805.659130859375,
"completions/mean_terminated_length": 812.1679321289063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 217.2,
"epoch": 0.3599955000562493,
"grad_norm": 0.0004782957839779556,
"learning_rate": 1.7469879518072292e-06,
"loss": -0.0064,
"num_tokens": 325940541.0,
"reward": 1.022280216217041,
"reward_std": 0.11486477702856064,
"rewards/accuracy_reward": 0.69921875,
"rewards/brier_reward": 0.8172156095504761,
"rewards/confidence_uniqueness_reward": 0.9295577049255371,
"rewards/format_reward": 0.9919270873069763,
"rewards/frontier_coverage_0": 0.018185996543616058,
"rewards/frontier_coverage_1": 0.018185996543616058,
"rewards/frontier_coverage_10": 0.018185996543616058,
"rewards/frontier_coverage_15": 0.019136503525078296,
"rewards/frontier_coverage_20": 0.02075750511139631,
"rewards/frontier_coverage_25": 0.02931526005268097,
"rewards/frontier_coverage_5": 0.018185996543616058,
"signal/accuracy_reward/centered_abs_mean": 0.14734700322151184,
"signal/accuracy_reward/group_std_mean": 0.19579098820686341,
"signal/accuracy_reward/group_zero_std_frac": 0.43611112236976624,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07367350161075592,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07367350161075592,
"signal/advantage_abs_mean": 0.08442184329032898,
"signal/advantage_pre_scale_abs_mean": 0.08442184329032898,
"signal/advantage_pre_scale_std": 0.15249321460723878,
"signal/advantage_std": 0.15249321460723878,
"signal/brier_reward/centered_abs_mean": 0.11737381666898727,
"signal/brier_reward/group_std_mean": 0.15464959442615508,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011737381666898727,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011737381666898727,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04265345185995102,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05998752787709236,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004265345307067037,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004265345307067037,
"signal/format_reward/centered_abs_mean": 0.01388346366584301,
"signal/format_reward/group_std_mean": 0.02453953940421343,
"signal/format_reward/group_zero_std_frac": 0.9055555582046508,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006941731832921505,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006941731832921505,
"signal/frontier_coverage_0/centered_abs_mean": 0.10197426676750183,
"signal/frontier_coverage_0/group_std_mean": 0.14345729649066924,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001458232058212161,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001458232058212161,
"signal/frontier_coverage_1/centered_abs_mean": 0.10197426676750183,
"signal/frontier_coverage_1/group_std_mean": 0.14345729649066924,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001458232058212161,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001458232058212161,
"signal/frontier_coverage_10/centered_abs_mean": 0.10197426676750183,
"signal/frontier_coverage_10/group_std_mean": 0.14345729649066924,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001458232058212161,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001458232058212161,
"signal/frontier_coverage_15/centered_abs_mean": 0.0956965520977974,
"signal/frontier_coverage_15/group_std_mean": 0.1353534460067749,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001368460664525628,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001368460664525628,
"signal/frontier_coverage_20/centered_abs_mean": 0.06615661978721618,
"signal/frontier_coverage_20/group_std_mean": 0.09637952744960784,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009460396599024534,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009460396599024534,
"signal/frontier_coverage_25/centered_abs_mean": 0.04605967253446579,
"signal/frontier_coverage_25/group_std_mean": 0.06603193655610085,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006586533272638917,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006586533272638917,
"signal/frontier_coverage_5/centered_abs_mean": 0.10197426676750183,
"signal/frontier_coverage_5/group_std_mean": 0.14345729649066924,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001458232058212161,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001458232058212161,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_calibration/aurc": 0.14445637044652185,
"eval_calibration/batch_distribution_entropy": 0.8076510952162764,
"eval_calibration/buffer_distribution_entropy": 0.8268265736598052,
"eval_calibration/confidence_entropy": 0.4869395872369577,
"eval_calibration/coverage@0%": 0.1986111111111111,
"eval_calibration/coverage@1%": 0.1986111111111111,
"eval_calibration/coverage@10%": 0.4069444444444444,
"eval_calibration/coverage@15%": 0.49548611111111107,
"eval_calibration/coverage@20%": 0.8232638888888889,
"eval_calibration/coverage@25%": 0.9253472222222223,
"eval_calibration/coverage@30%": 0.9722222222222223,
"eval_calibration/coverage@5%": 0.2767361111111111,
"eval_calibration/ece": 0.16583333333333333,
"eval_calibration/mean_confidence": 0.7061388888888889,
"eval_completions/clipped_ratio": 0.013888888888888876,
"eval_completions/max_length": 2670.0,
"eval_completions/max_terminated_length": 2670.0,
"eval_completions/mean_length": 768.7396036783854,
"eval_completions/mean_terminated_length": 779.64697265625,
"eval_completions/min_length": 55.166666666666664,
"eval_completions/min_terminated_length": 256.3333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 325940541.0,
"eval_reward": 1.0079765021800995,
"eval_reward_std": 0.2547302494446437,
"eval_rewards/accuracy_reward": 0.6840277711550394,
"eval_rewards/brier_reward": 0.8177569707234701,
"eval_rewards/confidence_uniqueness_reward": 0.8829086720943451,
"eval_rewards/format_reward": 0.9861111044883728,
"eval_rewards/frontier_coverage_0": 0.02693510102108121,
"eval_rewards/frontier_coverage_1": 0.02693510102108121,
"eval_rewards/frontier_coverage_10": 0.02693510102108121,
"eval_rewards/frontier_coverage_15": 0.027094673210134108,
"eval_rewards/frontier_coverage_20": 0.028464287829895813,
"eval_rewards/frontier_coverage_25": 0.03533406959225734,
"eval_rewards/frontier_coverage_5": 0.02693510102108121,
"eval_runtime": 196.1658,
"eval_samples_per_second": 5.098,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4182942708333333,
"eval_signal/accuracy_reward/group_std_mean": 0.4631828914086024,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20914713541666666,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20914713541666666,
"eval_signal/advantage_abs_mean": 0.2190506507953008,
"eval_signal/advantage_pre_scale_abs_mean": 0.2190506507953008,
"eval_signal/advantage_pre_scale_std": 0.25378915170828503,
"eval_signal/advantage_std": 0.25378915170828503,
"eval_signal/brier_reward/centered_abs_mean": 0.17790956050157547,
"eval_signal/brier_reward/group_std_mean": 0.23551630725463232,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017790956267466147,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.017790956267466147,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05993440312643846,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1008106352140506,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005993440669650833,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005993440669650833,
"eval_signal/format_reward/centered_abs_mean": 0.026692708022892475,
"eval_signal/format_reward/group_std_mean": 0.07258860394358635,
"eval_signal/format_reward/group_zero_std_frac": 0.6111111243565878,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013346354011446238,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.013346354011446238,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.16356521099805832,
"eval_signal/frontier_coverage_0/group_std_mean": 0.2565693234403928,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002338982536457479,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002338982536457479,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.16356521099805832,
"eval_signal/frontier_coverage_1/group_std_mean": 0.2565693234403928,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002338982536457479,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002338982536457479,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.16356521099805832,
"eval_signal/frontier_coverage_10/group_std_mean": 0.2565693234403928,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002338982536457479,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002338982536457479,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.14471895496050516,
"eval_signal/frontier_coverage_15/group_std_mean": 0.232371523976326,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002069481047025571,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002069481047025571,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.09058589860796928,
"eval_signal/frontier_coverage_20/group_std_mean": 0.1578061431646347,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012953783734701574,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012953783734701574,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.05932698274652163,
"eval_signal/frontier_coverage_25/group_std_mean": 0.09397069240609805,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008483757652963201,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008483757652963201,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.16356521099805832,
"eval_signal/frontier_coverage_5/group_std_mean": 0.2565693234403928,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002338982536457479,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002338982536457479,
"eval_steps_per_second": 0.031,
"step": 150
},
{
"calibration/aurc": 0.13392662799561467,
"calibration/batch_distribution_entropy": 0.8691829933423222,
"calibration/buffer_distribution_entropy": 0.831859476416979,
"calibration/confidence_entropy": 0.48191132883935534,
"calibration/coverage@0%": 0.018617021276595744,
"calibration/coverage@1%": 0.07606382978723404,
"calibration/coverage@10%": 0.4882390495713171,
"calibration/coverage@15%": 0.6851489520838969,
"calibration/coverage@20%": 0.7909027950912767,
"calibration/coverage@25%": 0.8724092080113021,
"calibration/coverage@30%": 0.9416853652455746,
"calibration/coverage@5%": 0.17503898852623373,
"calibration/ece": 0.1028043887841402,
"calibration/mean_confidence": 0.693157207237826,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007812500000000023,
"completions/max_length": 3306.4,
"completions/max_terminated_length": 3306.4,
"completions/mean_length": 744.222314453125,
"completions/mean_terminated_length": 750.1518798828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 228.2,
"epoch": 0.3719953500581243,
"grad_norm": 0.0004585942951962352,
"learning_rate": 1.5963855421686747e-06,
"loss": -0.0064,
"num_tokens": 337621694.0,
"reward": 1.0490242481231689,
"reward_std": 0.10810918956995011,
"rewards/accuracy_reward": 0.7472222328186036,
"rewards/brier_reward": 0.8391157627105713,
"rewards/confidence_uniqueness_reward": 0.9409691214561462,
"rewards/format_reward": 0.9921006798744202,
"rewards/frontier_coverage_0": 0.005688181053847075,
"rewards/frontier_coverage_1": 0.005688181053847075,
"rewards/frontier_coverage_10": 0.005688181053847075,
"rewards/frontier_coverage_15": 0.010548211727291345,
"rewards/frontier_coverage_20": 0.018851665779948235,
"rewards/frontier_coverage_25": 0.04255228638648987,
"rewards/frontier_coverage_5": 0.005688181053847075,
"signal/accuracy_reward/centered_abs_mean": 0.1384982645511627,
"signal/accuracy_reward/group_std_mean": 0.18303903341293334,
"signal/accuracy_reward/group_zero_std_frac": 0.47777777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06924913227558135,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06924913227558135,
"signal/advantage_abs_mean": 0.07813455611467361,
"signal/advantage_pre_scale_abs_mean": 0.07813455611467361,
"signal/advantage_pre_scale_std": 0.14875202775001525,
"signal/advantage_std": 0.14875202775001525,
"signal/brier_reward/centered_abs_mean": 0.10786259174346924,
"signal/brier_reward/group_std_mean": 0.14233016669750215,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01078625936061144,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01078625936061144,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03370047435164451,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05102566778659821,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003370047500357032,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003370047500357032,
"signal/format_reward/centered_abs_mean": 0.013829210214316845,
"signal/format_reward/group_std_mean": 0.026308896765112878,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006914605107158423,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006914605107158423,
"signal/frontier_coverage_0/centered_abs_mean": 0.11292163282632828,
"signal/frontier_coverage_0/group_std_mean": 0.15748831629753113,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001614779350347817,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001614779350347817,
"signal/frontier_coverage_1/centered_abs_mean": 0.11292163282632828,
"signal/frontier_coverage_1/group_std_mean": 0.15748831629753113,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001614779350347817,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001614779350347817,
"signal/frontier_coverage_10/centered_abs_mean": 0.11292163282632828,
"signal/frontier_coverage_10/group_std_mean": 0.15748831629753113,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001614779350347817,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001614779350347817,
"signal/frontier_coverage_15/centered_abs_mean": 0.09441954791545867,
"signal/frontier_coverage_15/group_std_mean": 0.13332569301128389,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013501995243132114,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013501995243132114,
"signal/frontier_coverage_20/centered_abs_mean": 0.05975788086652756,
"signal/frontier_coverage_20/group_std_mean": 0.08709384799003601,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008545376709662378,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008545376709662378,
"signal/frontier_coverage_25/centered_abs_mean": 0.04686888232827187,
"signal/frontier_coverage_25/group_std_mean": 0.06351146027445793,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006702250568196178,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006702250568196178,
"signal/frontier_coverage_5/centered_abs_mean": 0.11292163282632828,
"signal/frontier_coverage_5/group_std_mean": 0.15748831629753113,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001614779350347817,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001614779350347817,
"step": 155
},
{
"calibration/aurc": 0.1388528080720422,
"calibration/batch_distribution_entropy": 0.7976930988131808,
"calibration/buffer_distribution_entropy": 0.8404623309625426,
"calibration/confidence_entropy": 0.440109510855466,
"calibration/coverage@0%": 0.030789157940663177,
"calibration/coverage@1%": 0.030789157940663177,
"calibration/coverage@10%": 0.604611147469459,
"calibration/coverage@15%": 0.7060672993019198,
"calibration/coverage@20%": 0.7723958333333333,
"calibration/coverage@25%": 0.8401467678100264,
"calibration/coverage@30%": 0.8591029023746704,
"calibration/coverage@5%": 0.2589359729493892,
"calibration/ece": 0.12554189453879194,
"calibration/mean_confidence": 0.7421448630163099,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555557,
"completions/max_length": 3067.2,
"completions/max_terminated_length": 3067.2,
"completions/mean_length": 750.00947265625,
"completions/mean_terminated_length": 757.798974609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 236.6,
"epoch": 0.38399520005999926,
"grad_norm": 0.0004149152955505997,
"learning_rate": 1.4457831325301204e-06,
"loss": -0.0077,
"num_tokens": 349349099.0,
"reward": 1.0139774680137634,
"reward_std": 0.10905950218439102,
"rewards/accuracy_reward": 0.6820312380790711,
"rewards/brier_reward": 0.8137630939483642,
"rewards/confidence_uniqueness_reward": 0.9368009090423584,
"rewards/format_reward": 0.9897569417953491,
"rewards/frontier_coverage_0": 0.025845300406217575,
"rewards/frontier_coverage_1": 0.025845300406217575,
"rewards/frontier_coverage_10": 0.025845300406217575,
"rewards/frontier_coverage_15": 0.025336899049580097,
"rewards/frontier_coverage_20": 0.026929372176527976,
"rewards/frontier_coverage_25": 0.056025682389736174,
"rewards/frontier_coverage_5": 0.025845300406217575,
"signal/accuracy_reward/centered_abs_mean": 0.13673502951860428,
"signal/accuracy_reward/group_std_mean": 0.17701160609722139,
"signal/accuracy_reward/group_zero_std_frac": 0.5055555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06836751475930214,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06836751475930214,
"signal/advantage_abs_mean": 0.08092275410890579,
"signal/advantage_pre_scale_abs_mean": 0.08092275410890579,
"signal/advantage_pre_scale_std": 0.15143148899078368,
"signal/advantage_std": 0.15143148899078368,
"signal/brier_reward/centered_abs_mean": 0.11717015504837036,
"signal/brier_reward/group_std_mean": 0.15168921947479247,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011717015691101552,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011717015691101552,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036166638135910034,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05395522266626358,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036166639067232607,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036166639067232607,
"signal/format_reward/centered_abs_mean": 0.0162109375,
"signal/format_reward/group_std_mean": 0.030159536749124527,
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00810546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00810546875,
"signal/frontier_coverage_0/centered_abs_mean": 0.11039406508207321,
"signal/frontier_coverage_0/group_std_mean": 0.15149362683296203,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00157863509375602,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00157863509375602,
"signal/frontier_coverage_1/centered_abs_mean": 0.11039406508207321,
"signal/frontier_coverage_1/group_std_mean": 0.15149362683296203,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00157863509375602,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00157863509375602,
"signal/frontier_coverage_10/centered_abs_mean": 0.11039406508207321,
"signal/frontier_coverage_10/group_std_mean": 0.15149362683296203,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00157863509375602,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00157863509375602,
"signal/frontier_coverage_15/centered_abs_mean": 0.08255304098129272,
"signal/frontier_coverage_15/group_std_mean": 0.11533389985561371,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011805085465312003,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011805085465312003,
"signal/frontier_coverage_20/centered_abs_mean": 0.05247567817568779,
"signal/frontier_coverage_20/group_std_mean": 0.0738027811050415,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007504021981731057,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007504021981731057,
"signal/frontier_coverage_25/centered_abs_mean": 0.05542582124471664,
"signal/frontier_coverage_25/group_std_mean": 0.07121084332466125,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000792589201591909,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000792589201591909,
"signal/frontier_coverage_5/centered_abs_mean": 0.11039406508207321,
"signal/frontier_coverage_5/group_std_mean": 0.15149362683296203,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00157863509375602,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00157863509375602,
"step": 160
},
{
"calibration/aurc": 0.13946615973860202,
"calibration/batch_distribution_entropy": 0.8478804166176523,
"calibration/buffer_distribution_entropy": 0.8458874423426573,
"calibration/confidence_entropy": 0.44236236460047246,
"calibration/coverage@0%": 0.07055610427831982,
"calibration/coverage@1%": 0.07055610427831982,
"calibration/coverage@10%": 0.45169935353064955,
"calibration/coverage@15%": 0.5726540277894626,
"calibration/coverage@20%": 0.7059143774537835,
"calibration/coverage@25%": 0.7753862294074275,
"calibration/coverage@30%": 0.8993705754320949,
"calibration/coverage@5%": 0.32451957543285126,
"calibration/ece": 0.09116762894764044,
"calibration/mean_confidence": 0.6891000581889718,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008333333333333349,
"completions/max_length": 3250.0,
"completions/max_terminated_length": 3250.0,
"completions/mean_length": 787.2148559570312,
"completions/mean_terminated_length": 793.8334838867188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 230.6,
"epoch": 0.39599505006187424,
"grad_norm": 0.00041544463601894677,
"learning_rate": 1.2951807228915664e-06,
"loss": -0.0046,
"num_tokens": 361556886.0,
"reward": 1.0158275485038757,
"reward_std": 0.10938665568828583,
"rewards/accuracy_reward": 0.6794270873069763,
"rewards/brier_reward": 0.8199212312698364,
"rewards/confidence_uniqueness_reward": 0.940508759021759,
"rewards/format_reward": 0.9916666626930237,
"rewards/frontier_coverage_0": 0.03606350589543581,
"rewards/frontier_coverage_1": 0.03606350589543581,
"rewards/frontier_coverage_10": 0.03607259057462216,
"rewards/frontier_coverage_15": 0.034527404233813286,
"rewards/frontier_coverage_20": 0.03649830408394337,
"rewards/frontier_coverage_25": 0.08104953020811081,
"rewards/frontier_coverage_5": 0.03606350589543581,
"signal/accuracy_reward/centered_abs_mean": 0.13355577141046523,
"signal/accuracy_reward/group_std_mean": 0.17831997573375702,
"signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06677788570523262,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06677788570523262,
"signal/advantage_abs_mean": 0.07951956540346146,
"signal/advantage_pre_scale_abs_mean": 0.07951956540346146,
"signal/advantage_pre_scale_std": 0.14827975630760193,
"signal/advantage_std": 0.14827975630760193,
"signal/brier_reward/centered_abs_mean": 0.11730274558067322,
"signal/brier_reward/group_std_mean": 0.15478793978691102,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011730275116860867,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011730275116860867,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03239181265234947,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04922289177775383,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003239181311801076,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003239181311801076,
"signal/format_reward/centered_abs_mean": 0.01398654505610466,
"signal/format_reward/group_std_mean": 0.026783711090683938,
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00699327252805233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00699327252805233,
"signal/frontier_coverage_0/centered_abs_mean": 0.11483795940876007,
"signal/frontier_coverage_0/group_std_mean": 0.1570892423391342,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016421828418970107,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016421828418970107,
"signal/frontier_coverage_1/centered_abs_mean": 0.11483795940876007,
"signal/frontier_coverage_1/group_std_mean": 0.1570892423391342,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016421828418970107,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016421828418970107,
"signal/frontier_coverage_10/centered_abs_mean": 0.11440861821174622,
"signal/frontier_coverage_10/group_std_mean": 0.15652381181716918,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016360432375222445,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016360432375222445,
"signal/frontier_coverage_15/centered_abs_mean": 0.08434305042028427,
"signal/frontier_coverage_15/group_std_mean": 0.11699189096689225,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001206105574965477,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001206105574965477,
"signal/frontier_coverage_20/centered_abs_mean": 0.05325452834367752,
"signal/frontier_coverage_20/group_std_mean": 0.07273693531751632,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007615397684276104,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007615397684276104,
"signal/frontier_coverage_25/centered_abs_mean": 0.06348953396081924,
"signal/frontier_coverage_25/group_std_mean": 0.08162465393543243,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009079003590159118,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009079003590159118,
"signal/frontier_coverage_5/centered_abs_mean": 0.11483795940876007,
"signal/frontier_coverage_5/group_std_mean": 0.1570892423391342,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016421828418970107,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016421828418970107,
"step": 165
},
{
"calibration/aurc": 0.11345005897139374,
"calibration/batch_distribution_entropy": 0.706185091262855,
"calibration/buffer_distribution_entropy": 0.8465500743068246,
"calibration/confidence_entropy": 0.36535344265534453,
"calibration/coverage@0%": 0.04657404353562005,
"calibration/coverage@1%": 0.04657404353562005,
"calibration/coverage@10%": 0.5926623947732128,
"calibration/coverage@15%": 0.6906030995658305,
"calibration/coverage@20%": 0.8280044496574324,
"calibration/coverage@25%": 0.9148695822162646,
"calibration/coverage@30%": 0.9685182082216265,
"calibration/coverage@5%": 0.3362439533861038,
"calibration/ece": 0.07788768102113267,
"calibration/mean_confidence": 0.7964747611011322,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008072916666666674,
"completions/max_length": 3205.2,
"completions/max_terminated_length": 3205.2,
"completions/mean_length": 751.2134521484375,
"completions/mean_terminated_length": 757.3519409179687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 216.4,
"epoch": 0.4079949000637492,
"grad_norm": 0.00043807283509522676,
"learning_rate": 1.1445783132530121e-06,
"loss": -0.0052,
"num_tokens": 373300049.0,
"reward": 1.0416455030441285,
"reward_std": 0.10653368085622787,
"rewards/accuracy_reward": 0.7301215171813965,
"rewards/brier_reward": 0.8392017483711243,
"rewards/confidence_uniqueness_reward": 0.9308984518051148,
"rewards/format_reward": 0.9919270873069763,
"rewards/frontier_coverage_0": 0.020954896369948982,
"rewards/frontier_coverage_1": 0.020954896369948982,
"rewards/frontier_coverage_10": 0.02102845092304051,
"rewards/frontier_coverage_15": 0.02324553709477186,
"rewards/frontier_coverage_20": 0.03625187166035175,
"rewards/frontier_coverage_25": 0.10913793593645096,
"rewards/frontier_coverage_5": 0.020954896369948982,
"signal/accuracy_reward/centered_abs_mean": 0.13088650107383729,
"signal/accuracy_reward/group_std_mean": 0.17503231167793273,
"signal/accuracy_reward/group_zero_std_frac": 0.4972222328186035,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06544325053691864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06544325053691864,
"signal/advantage_abs_mean": 0.07733463197946548,
"signal/advantage_pre_scale_abs_mean": 0.07733463197946548,
"signal/advantage_pre_scale_std": 0.1462598502635956,
"signal/advantage_std": 0.1462598502635956,
"signal/brier_reward/centered_abs_mean": 0.11199976354837418,
"signal/brier_reward/group_std_mean": 0.14869881868362428,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011199977062642575,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011199977062642575,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03646374717354774,
"signal/confidence_uniqueness_reward/group_std_mean": 0.053655432909727095,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003646374773234129,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003646374773234129,
"signal/format_reward/centered_abs_mean": 0.012668185867369175,
"signal/format_reward/group_std_mean": 0.024041558802127837,
"signal/format_reward/group_zero_std_frac": 0.9000000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0063340929336845875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0063340929336845875,
"signal/frontier_coverage_0/centered_abs_mean": 0.0988058403134346,
"signal/frontier_coverage_0/group_std_mean": 0.13648533821105957,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014129235176369547,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014129235176369547,
"signal/frontier_coverage_1/centered_abs_mean": 0.0988058403134346,
"signal/frontier_coverage_1/group_std_mean": 0.13648533821105957,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014129235176369547,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014129235176369547,
"signal/frontier_coverage_10/centered_abs_mean": 0.09827619642019272,
"signal/frontier_coverage_10/group_std_mean": 0.13578663468360902,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014053495600819589,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014053495600819589,
"signal/frontier_coverage_15/centered_abs_mean": 0.07011277079582215,
"signal/frontier_coverage_15/group_std_mean": 0.09825922846794129,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010026126867160202,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010026126867160202,
"signal/frontier_coverage_20/centered_abs_mean": 0.04608847498893738,
"signal/frontier_coverage_20/group_std_mean": 0.06254973709583282,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006590651930309833,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006590651930309833,
"signal/frontier_coverage_25/centered_abs_mean": 0.0696468323469162,
"signal/frontier_coverage_25/group_std_mean": 0.08956842720508576,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009959497139789164,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009959497139789164,
"signal/frontier_coverage_5/centered_abs_mean": 0.0988058403134346,
"signal/frontier_coverage_5/group_std_mean": 0.13648533821105957,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014129235176369547,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014129235176369547,
"step": 170
},
{
"calibration/aurc": 0.10976473305508475,
"calibration/batch_distribution_entropy": 0.7718932038406594,
"calibration/buffer_distribution_entropy": 0.8409602787241589,
"calibration/confidence_entropy": 0.38561692641318424,
"calibration/coverage@0%": 0.05462301483420593,
"calibration/coverage@1%": 0.05462301483420593,
"calibration/coverage@10%": 0.5865572334220591,
"calibration/coverage@15%": 0.7722409170414061,
"calibration/coverage@20%": 0.8814547125793206,
"calibration/coverage@25%": 0.9438745800671893,
"calibration/coverage@30%": 0.9824972004479283,
"calibration/coverage@5%": 0.17130578097731236,
"calibration/ece": 0.07796374556939645,
"calibration/mean_confidence": 0.7677826396168446,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009809027777777767,
"completions/max_length": 3857.4,
"completions/max_terminated_length": 3857.4,
"completions/mean_length": 793.6175415039063,
"completions/mean_terminated_length": 801.4669189453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.4,
"epoch": 0.4199947500656242,
"grad_norm": 0.0005034080822952092,
"learning_rate": 9.93975903614458e-07,
"loss": -0.0094,
"num_tokens": 385550491.0,
"reward": 1.0352750539779663,
"reward_std": 0.11443682610988617,
"rewards/accuracy_reward": 0.7231770873069763,
"rewards/brier_reward": 0.8273264408111572,
"rewards/confidence_uniqueness_reward": 0.92387535572052,
"rewards/format_reward": 0.9900173544883728,
"rewards/frontier_coverage_0": 0.01753472238779068,
"rewards/frontier_coverage_1": 0.01753472238779068,
"rewards/frontier_coverage_10": 0.017634299769997596,
"rewards/frontier_coverage_15": 0.01905247466638684,
"rewards/frontier_coverage_20": 0.037164825946092606,
"rewards/frontier_coverage_25": 0.12233072817325592,
"rewards/frontier_coverage_5": 0.01753472238779068,
"signal/accuracy_reward/centered_abs_mean": 0.13874240219593048,
"signal/accuracy_reward/group_std_mean": 0.18652132749557496,
"signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06937120109796524,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06937120109796524,
"signal/advantage_abs_mean": 0.0822924718260765,
"signal/advantage_pre_scale_abs_mean": 0.0822924718260765,
"signal/advantage_pre_scale_std": 0.15624974370002748,
"signal/advantage_std": 0.15624974370002748,
"signal/brier_reward/centered_abs_mean": 0.11687278598546982,
"signal/brier_reward/group_std_mean": 0.1550233006477356,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011687278375029564,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011687278375029564,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04072035998106003,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05919913575053215,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004072036035358906,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004072036035358906,
"signal/format_reward/centered_abs_mean": 0.01610785573720932,
"signal/format_reward/group_std_mean": 0.02907128185033798,
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00805392786860466,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00805392786860466,
"signal/frontier_coverage_0/centered_abs_mean": 0.09476696848869323,
"signal/frontier_coverage_0/group_std_mean": 0.13635447770357131,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013551676413044334,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013551676413044334,
"signal/frontier_coverage_1/centered_abs_mean": 0.09476696848869323,
"signal/frontier_coverage_1/group_std_mean": 0.13635447770357131,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013551676413044334,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013551676413044334,
"signal/frontier_coverage_10/centered_abs_mean": 0.0943188950419426,
"signal/frontier_coverage_10/group_std_mean": 0.13574583530426027,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013487601187080144,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013487601187080144,
"signal/frontier_coverage_15/centered_abs_mean": 0.06526126489043235,
"signal/frontier_coverage_15/group_std_mean": 0.09589355587959289,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009332361165434122,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009332361165434122,
"signal/frontier_coverage_20/centered_abs_mean": 0.04581375271081924,
"signal/frontier_coverage_20/group_std_mean": 0.06430187001824379,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006551366415806115,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006551366415806115,
"signal/frontier_coverage_25/centered_abs_mean": 0.07468874454498291,
"signal/frontier_coverage_25/group_std_mean": 0.09664878100156785,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010680490406230092,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010680490406230092,
"signal/frontier_coverage_5/centered_abs_mean": 0.09476696848869323,
"signal/frontier_coverage_5/group_std_mean": 0.13635447770357131,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013551676413044334,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013551676413044334,
"step": 175
},
{
"calibration/aurc": 0.08759878898293974,
"calibration/batch_distribution_entropy": 0.7113265211208019,
"calibration/buffer_distribution_entropy": 0.8354377217127791,
"calibration/confidence_entropy": 0.35385404928297615,
"calibration/coverage@0%": 0.04326398224140371,
"calibration/coverage@1%": 0.04326398224140371,
"calibration/coverage@10%": 0.695155511232948,
"calibration/coverage@15%": 0.8354279855474227,
"calibration/coverage@20%": 0.941970765886207,
"calibration/coverage@25%": 0.98125,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.38192302935652567,
"calibration/ece": 0.06531083905713946,
"calibration/mean_confidence": 0.8009476712326775,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013194444444444443,
"completions/max_length": 3881.0,
"completions/max_terminated_length": 3881.0,
"completions/mean_length": 757.8158935546875,
"completions/mean_terminated_length": 767.9434936523437,
"completions/min_length": 0.0,
"completions/min_terminated_length": 240.0,
"epoch": 0.4319946000674992,
"grad_norm": 0.00047206279123201966,
"learning_rate": 8.433734939759036e-07,
"loss": -0.01,
"num_tokens": 397380498.0,
"reward": 1.0324453830718994,
"reward_std": 0.12025202363729477,
"rewards/accuracy_reward": 0.7246527671813965,
"rewards/brier_reward": 0.8183708786964417,
"rewards/confidence_uniqueness_reward": 0.9126166462898254,
"rewards/format_reward": 0.98671875,
"rewards/frontier_coverage_0": 0.014990578033030033,
"rewards/frontier_coverage_1": 0.014990578033030033,
"rewards/frontier_coverage_10": 0.014904208388179541,
"rewards/frontier_coverage_15": 0.017171311657875776,
"rewards/frontier_coverage_20": 0.040305860340595245,
"rewards/frontier_coverage_25": 0.13865080773830413,
"rewards/frontier_coverage_5": 0.014990578033030033,
"signal/accuracy_reward/centered_abs_mean": 0.1432291626930237,
"signal/accuracy_reward/group_std_mean": 0.1884150594472885,
"signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07161458134651184,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07161458134651184,
"signal/advantage_abs_mean": 0.08963050693273544,
"signal/advantage_pre_scale_abs_mean": 0.08963050693273544,
"signal/advantage_pre_scale_std": 0.166022652387619,
"signal/advantage_std": 0.166022652387619,
"signal/brier_reward/centered_abs_mean": 0.12345067262649537,
"signal/brier_reward/group_std_mean": 0.16434098184108734,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012345067597925663,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012345067597925663,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04739027544856071,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06653770804405212,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0047390274703502655,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0047390274703502655,
"signal/format_reward/centered_abs_mean": 0.01977539025247097,
"signal/format_reward/group_std_mean": 0.03279608637094498,
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009887695126235486,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009887695126235486,
"signal/frontier_coverage_0/centered_abs_mean": 0.08489089906215667,
"signal/frontier_coverage_0/group_std_mean": 0.12453063875436783,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012139398604631424,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012139398604631424,
"signal/frontier_coverage_1/centered_abs_mean": 0.08489089906215667,
"signal/frontier_coverage_1/group_std_mean": 0.12453063875436783,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012139398604631424,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012139398604631424,
"signal/frontier_coverage_10/centered_abs_mean": 0.08434738963842392,
"signal/frontier_coverage_10/group_std_mean": 0.123808091878891,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001206167647615075,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001206167647615075,
"signal/frontier_coverage_15/centered_abs_mean": 0.05714336410164833,
"signal/frontier_coverage_15/group_std_mean": 0.0846284121274948,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008171500754542649,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008171500754542649,
"signal/frontier_coverage_20/centered_abs_mean": 0.045037756115198134,
"signal/frontier_coverage_20/group_std_mean": 0.061644711345434186,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006440398865379393,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006440398865379393,
"signal/frontier_coverage_25/centered_abs_mean": 0.08562668412923813,
"signal/frontier_coverage_25/group_std_mean": 0.11055618077516556,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012244615936651826,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012244615936651826,
"signal/frontier_coverage_5/centered_abs_mean": 0.08489089906215667,
"signal/frontier_coverage_5/group_std_mean": 0.12453063875436783,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012139398604631424,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012139398604631424,
"step": 180
},
{
"calibration/aurc": 0.1682798620491915,
"calibration/batch_distribution_entropy": 0.7261975431193642,
"calibration/buffer_distribution_entropy": 0.8308463518220235,
"calibration/confidence_entropy": 0.376597343419283,
"calibration/coverage@0%": 0.021973124820581523,
"calibration/coverage@1%": 0.021973124820581523,
"calibration/coverage@10%": 0.12589397393134938,
"calibration/coverage@15%": 0.4402788932101213,
"calibration/coverage@20%": 0.8254405435639459,
"calibration/coverage@25%": 0.893771815008726,
"calibration/coverage@30%": 0.9361256544502619,
"calibration/coverage@5%": 0.05168102299290528,
"calibration/ece": 0.11194314941012559,
"calibration/mean_confidence": 0.7820696841453423,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012586805555555558,
"completions/max_length": 3486.0,
"completions/max_terminated_length": 3486.0,
"completions/mean_length": 758.8857666015625,
"completions/mean_terminated_length": 768.6445678710937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 206.8,
"epoch": 0.44399445006937416,
"grad_norm": 0.00047838789760135114,
"learning_rate": 6.927710843373495e-07,
"loss": -0.0084,
"num_tokens": 409212910.0,
"reward": 1.0218183040618896,
"reward_std": 0.11956067681312561,
"rewards/accuracy_reward": 0.69921875,
"rewards/brier_reward": 0.8167834639549255,
"rewards/confidence_uniqueness_reward": 0.9220002293586731,
"rewards/format_reward": 0.9873263835906982,
"rewards/frontier_coverage_0": 0.027354469895362853,
"rewards/frontier_coverage_1": 0.027354469895362853,
"rewards/frontier_coverage_10": 0.027558755502104758,
"rewards/frontier_coverage_15": 0.026396383717656135,
"rewards/frontier_coverage_20": 0.04515022188425064,
"rewards/frontier_coverage_25": 0.14521757364273072,
"rewards/frontier_coverage_5": 0.027354469895362853,
"signal/accuracy_reward/centered_abs_mean": 0.1435384124517441,
"signal/accuracy_reward/group_std_mean": 0.19056273102760315,
"signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07176920622587205,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07176920622587205,
"signal/advantage_abs_mean": 0.08683380931615829,
"signal/advantage_pre_scale_abs_mean": 0.08683380931615829,
"signal/advantage_pre_scale_std": 0.1575675427913666,
"signal/advantage_std": 0.1575675427913666,
"signal/brier_reward/centered_abs_mean": 0.11930460929870605,
"signal/brier_reward/group_std_mean": 0.15884668827056886,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011930461041629314,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011930461041629314,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.042117471992969516,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06263997331261635,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0042117472738027574,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042117472738027574,
"signal/format_reward/centered_abs_mean": 0.01792534738779068,
"signal/format_reward/group_std_mean": 0.033342940360307695,
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00896267369389534,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00896267369389534,
"signal/frontier_coverage_0/centered_abs_mean": 0.0974617674946785,
"signal/frontier_coverage_0/group_std_mean": 0.13657613545656205,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013937032548710705,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013937032548710705,
"signal/frontier_coverage_1/centered_abs_mean": 0.0974617674946785,
"signal/frontier_coverage_1/group_std_mean": 0.13657613545656205,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013937032548710705,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013937032548710705,
"signal/frontier_coverage_10/centered_abs_mean": 0.09610613882541656,
"signal/frontier_coverage_10/group_std_mean": 0.1348185271024704,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013743178220465778,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013743178220465778,
"signal/frontier_coverage_15/centered_abs_mean": 0.06056812852621078,
"signal/frontier_coverage_15/group_std_mean": 0.08634127974510193,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008661242201924324,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008661242201924324,
"signal/frontier_coverage_20/centered_abs_mean": 0.04676060602068901,
"signal/frontier_coverage_20/group_std_mean": 0.06279976889491082,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006686766748316586,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006686766748316586,
"signal/frontier_coverage_25/centered_abs_mean": 0.08563613891601562,
"signal/frontier_coverage_25/group_std_mean": 0.11192211657762527,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012245968217030167,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012245968217030167,
"signal/frontier_coverage_5/centered_abs_mean": 0.0974617674946785,
"signal/frontier_coverage_5/group_std_mean": 0.13657613545656205,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013937032548710705,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013937032548710705,
"step": 185
},
{
"calibration/aurc": 0.15483879036252451,
"calibration/batch_distribution_entropy": 0.6950153465439828,
"calibration/buffer_distribution_entropy": 0.826411909010264,
"calibration/confidence_entropy": 0.3410826717900854,
"calibration/coverage@0%": 0.02739505924884515,
"calibration/coverage@1%": 0.02739505924884515,
"calibration/coverage@10%": 0.45961036352681256,
"calibration/coverage@15%": 0.5195476172337611,
"calibration/coverage@20%": 0.5751837242279644,
"calibration/coverage@25%": 0.7749469647519582,
"calibration/coverage@30%": 0.9947835073977374,
"calibration/coverage@5%": 0.240377862886191,
"calibration/ece": 0.10119530147175385,
"calibration/mean_confidence": 0.7977167211208361,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007725694444444442,
"completions/max_length": 3442.2,
"completions/max_terminated_length": 3442.2,
"completions/mean_length": 757.0838623046875,
"completions/mean_terminated_length": 762.9970825195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 208.2,
"epoch": 0.45599430007124914,
"grad_norm": 0.0005020072567276657,
"learning_rate": 5.421686746987952e-07,
"loss": -0.0064,
"num_tokens": 421017460.0,
"reward": 1.0469782829284668,
"reward_std": 0.1171686366200447,
"rewards/accuracy_reward": 0.7385416626930237,
"rewards/brier_reward": 0.8404343128204346,
"rewards/confidence_uniqueness_reward": 0.9256058096885681,
"rewards/format_reward": 0.9921875,
"rewards/frontier_coverage_0": 0.024775561434216796,
"rewards/frontier_coverage_1": 0.024775561434216796,
"rewards/frontier_coverage_10": 0.025043190643191337,
"rewards/frontier_coverage_15": 0.027431031875312328,
"rewards/frontier_coverage_20": 0.051411689072847364,
"rewards/frontier_coverage_25": 0.17211123406887055,
"rewards/frontier_coverage_5": 0.024775561434216796,
"signal/accuracy_reward/centered_abs_mean": 0.14800347089767457,
"signal/accuracy_reward/group_std_mean": 0.1954652965068817,
"signal/accuracy_reward/group_zero_std_frac": 0.43333333134651186,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07400173544883729,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07400173544883729,
"signal/advantage_abs_mean": 0.08494782447814941,
"signal/advantage_pre_scale_abs_mean": 0.08494782447814941,
"signal/advantage_pre_scale_std": 0.15611115396022796,
"signal/advantage_std": 0.15611115396022796,
"signal/brier_reward/centered_abs_mean": 0.1102172926068306,
"signal/brier_reward/group_std_mean": 0.15126932561397552,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011021729186177253,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011021729186177253,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038782857730984686,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0578602485358715,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038782859221100805,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038782859221100805,
"signal/format_reward/centered_abs_mean": 0.013769531343132257,
"signal/format_reward/group_std_mean": 0.027529601380228995,
"signal/format_reward/group_zero_std_frac": 0.8805555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006884765671566128,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006884765671566128,
"signal/frontier_coverage_0/centered_abs_mean": 0.09849800616502762,
"signal/frontier_coverage_0/group_std_mean": 0.13932813704013824,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014085214817896486,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014085214817896486,
"signal/frontier_coverage_1/centered_abs_mean": 0.09849800616502762,
"signal/frontier_coverage_1/group_std_mean": 0.13932813704013824,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014085214817896486,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014085214817896486,
"signal/frontier_coverage_10/centered_abs_mean": 0.09644435942173005,
"signal/frontier_coverage_10/group_std_mean": 0.136637906730175,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013791543431580066,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013791543431580066,
"signal/frontier_coverage_15/centered_abs_mean": 0.05930143967270851,
"signal/frontier_coverage_15/group_std_mean": 0.08500065058469772,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008480105898343027,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008480105898343027,
"signal/frontier_coverage_20/centered_abs_mean": 0.047777583450078966,
"signal/frontier_coverage_20/group_std_mean": 0.06432797089219093,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006832194398157298,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006832194398157298,
"signal/frontier_coverage_25/centered_abs_mean": 0.09025410264730453,
"signal/frontier_coverage_25/group_std_mean": 0.11796820908784866,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012906335527077318,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012906335527077318,
"signal/frontier_coverage_5/centered_abs_mean": 0.09849800616502762,
"signal/frontier_coverage_5/group_std_mean": 0.13932813704013824,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014085214817896486,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014085214817896486,
"step": 190
},
{
"calibration/aurc": 0.13881246169842548,
"calibration/batch_distribution_entropy": 0.8034007790943107,
"calibration/buffer_distribution_entropy": 0.8229056014565203,
"calibration/confidence_entropy": 0.3790029894934074,
"calibration/coverage@0%": 0.04114583333333333,
"calibration/coverage@1%": 0.04114583333333333,
"calibration/coverage@10%": 0.4840796301081628,
"calibration/coverage@15%": 0.6186890797116715,
"calibration/coverage@20%": 0.7321911642296752,
"calibration/coverage@25%": 0.8936800027017224,
"calibration/coverage@30%": 0.9290811058850522,
"calibration/coverage@5%": 0.22732308576455895,
"calibration/ece": 0.11684353143808787,
"calibration/mean_confidence": 0.7342745704577593,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01015625,
"completions/max_length": 3622.6,
"completions/max_terminated_length": 3622.6,
"completions/mean_length": 783.8465454101563,
"completions/mean_terminated_length": 792.0054931640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 201.6,
"epoch": 0.46799415007312406,
"grad_norm": 0.00047749123768880963,
"learning_rate": 3.91566265060241e-07,
"loss": -0.0094,
"num_tokens": 433128236.0,
"reward": 1.0204681873321533,
"reward_std": 0.11507217586040497,
"rewards/accuracy_reward": 0.69296875,
"rewards/brier_reward": 0.8137424111366272,
"rewards/confidence_uniqueness_reward": 0.9278370261192321,
"rewards/format_reward": 0.989756953716278,
"rewards/frontier_coverage_0": 0.027737023681402205,
"rewards/frontier_coverage_1": 0.027737023681402205,
"rewards/frontier_coverage_10": 0.02771041765809059,
"rewards/frontier_coverage_15": 0.027653909847140314,
"rewards/frontier_coverage_20": 0.048602229356765746,
"rewards/frontier_coverage_25": 0.1587932139635086,
"rewards/frontier_coverage_5": 0.027737023681402205,
"signal/accuracy_reward/centered_abs_mean": 0.137255859375,
"signal/accuracy_reward/group_std_mean": 0.18516563177108764,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686279296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0686279296875,
"signal/advantage_abs_mean": 0.08264462798833846,
"signal/advantage_pre_scale_abs_mean": 0.08264462798833846,
"signal/advantage_pre_scale_std": 0.15539104044437407,
"signal/advantage_std": 0.15539104044437407,
"signal/brier_reward/centered_abs_mean": 0.12252413183450699,
"signal/brier_reward/group_std_mean": 0.16194479465484618,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012252412736415863,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012252412736415863,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03895300626754761,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05910627841949463,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003895300766453147,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003895300766453147,
"signal/format_reward/centered_abs_mean": 0.017187500186264516,
"signal/format_reward/group_std_mean": 0.032911072671413424,
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008593750093132258,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008593750093132258,
"signal/frontier_coverage_0/centered_abs_mean": 0.099191452562809,
"signal/frontier_coverage_0/group_std_mean": 0.14243146479129792,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001418437750544399,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001418437750544399,
"signal/frontier_coverage_1/centered_abs_mean": 0.099191452562809,
"signal/frontier_coverage_1/group_std_mean": 0.14243146479129792,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001418437750544399,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001418437750544399,
"signal/frontier_coverage_10/centered_abs_mean": 0.09691323190927506,
"signal/frontier_coverage_10/group_std_mean": 0.13935501873493195,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001385859283618629,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001385859283618629,
"signal/frontier_coverage_15/centered_abs_mean": 0.059005143493413924,
"signal/frontier_coverage_15/group_std_mean": 0.08615712970495223,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008437735494226217,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008437735494226217,
"signal/frontier_coverage_20/centered_abs_mean": 0.049475245922803876,
"signal/frontier_coverage_20/group_std_mean": 0.06738647893071174,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007074960158206522,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007074960158206522,
"signal/frontier_coverage_25/centered_abs_mean": 0.09773661196231842,
"signal/frontier_coverage_25/group_std_mean": 0.12605148553848267,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013976335991173982,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013976335991173982,
"signal/frontier_coverage_5/centered_abs_mean": 0.099191452562809,
"signal/frontier_coverage_5/group_std_mean": 0.14243146479129792,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001418437750544399,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001418437750544399,
"step": 195
},
{
"calibration/aurc": 0.17450118814431104,
"calibration/batch_distribution_entropy": 0.7161232206831062,
"calibration/buffer_distribution_entropy": 0.8213628377832405,
"calibration/confidence_entropy": 0.3782438955136188,
"calibration/coverage@0%": 0.01263157894736842,
"calibration/coverage@1%": 0.01263157894736842,
"calibration/coverage@10%": 0.33640648618936375,
"calibration/coverage@15%": 0.3862209701800192,
"calibration/coverage@20%": 0.8251629706609866,
"calibration/coverage@25%": 0.894030134899913,
"calibration/coverage@30%": 0.9378590078328981,
"calibration/coverage@5%": 0.1907503091933489,
"calibration/ece": 0.13517372072619013,
"calibration/mean_confidence": 0.804132633586924,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006423611111111116,
"completions/max_length": 3196.8,
"completions/max_terminated_length": 3196.8,
"completions/mean_length": 757.547216796875,
"completions/mean_terminated_length": 762.4617919921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 243.2,
"epoch": 0.47999400007499904,
"grad_norm": 0.0004946914850734174,
"learning_rate": 2.409638554216868e-07,
"loss": -0.0048,
"num_tokens": 444922988.0,
"reward": 1.0290307998657227,
"reward_std": 0.11202344298362732,
"rewards/accuracy_reward": 0.7034722208976746,
"rewards/brier_reward": 0.8208521723747253,
"rewards/confidence_uniqueness_reward": 0.9324617624282837,
"rewards/format_reward": 0.9935763835906982,
"rewards/frontier_coverage_0": 0.02996818870306015,
"rewards/frontier_coverage_1": 0.02996818870306015,
"rewards/frontier_coverage_10": 0.03023492209613323,
"rewards/frontier_coverage_15": 0.02954472191631794,
"rewards/frontier_coverage_20": 0.04928178116679192,
"rewards/frontier_coverage_25": 0.1629292458295822,
"rewards/frontier_coverage_5": 0.02996818870306015,
"signal/accuracy_reward/centered_abs_mean": 0.14172092080116272,
"signal/accuracy_reward/group_std_mean": 0.18414589762687683,
"signal/accuracy_reward/group_zero_std_frac": 0.4888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07086046040058136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07086046040058136,
"signal/advantage_abs_mean": 0.08335170894861221,
"signal/advantage_pre_scale_abs_mean": 0.08335170894861221,
"signal/advantage_pre_scale_std": 0.15283463299274444,
"signal/advantage_std": 0.15283463299274444,
"signal/brier_reward/centered_abs_mean": 0.11691619008779526,
"signal/brier_reward/group_std_mean": 0.15511786341667175,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011691619642078876,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011691619642078876,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.032414442673325536,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04753193408250809,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032414443790912627,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032414443790912627,
"signal/format_reward/centered_abs_mean": 0.01107855886220932,
"signal/format_reward/group_std_mean": 0.021301887929439545,
"signal/format_reward/group_zero_std_frac": 0.9111111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00553927943110466,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00553927943110466,
"signal/frontier_coverage_0/centered_abs_mean": 0.09230706095695496,
"signal/frontier_coverage_0/group_std_mean": 0.1337108790874481,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013199909590184689,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013199909590184689,
"signal/frontier_coverage_1/centered_abs_mean": 0.09230706095695496,
"signal/frontier_coverage_1/group_std_mean": 0.1337108790874481,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013199909590184689,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013199909590184689,
"signal/frontier_coverage_10/centered_abs_mean": 0.08967625200748444,
"signal/frontier_coverage_10/group_std_mean": 0.1302173465490341,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001282370393164456,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001282370393164456,
"signal/frontier_coverage_15/centered_abs_mean": 0.05314598008990288,
"signal/frontier_coverage_15/group_std_mean": 0.07881596982479096,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007599874981679023,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007599874981679023,
"signal/frontier_coverage_20/centered_abs_mean": 0.04684214442968369,
"signal/frontier_coverage_20/group_std_mean": 0.06328665986657142,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006698426441289485,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006698426441289485,
"signal/frontier_coverage_25/centered_abs_mean": 0.10195220559835434,
"signal/frontier_coverage_25/group_std_mean": 0.13177755773067473,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001457916502840817,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001457916502840817,
"signal/frontier_coverage_5/centered_abs_mean": 0.09230706095695496,
"signal/frontier_coverage_5/group_std_mean": 0.1337108790874481,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013199909590184689,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013199909590184689,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_calibration/aurc": 0.21105365972920764,
"eval_calibration/batch_distribution_entropy": 0.7556374205806667,
"eval_calibration/buffer_distribution_entropy": 0.8182456951169592,
"eval_calibration/confidence_entropy": 0.38275497673899267,
"eval_calibration/coverage@0%": 0.13020833333333334,
"eval_calibration/coverage@1%": 0.13020833333333334,
"eval_calibration/coverage@10%": 0.25,
"eval_calibration/coverage@15%": 0.4620295698924732,
"eval_calibration/coverage@20%": 0.6500336021505376,
"eval_calibration/coverage@25%": 0.8175403225806451,
"eval_calibration/coverage@30%": 0.9427083333333334,
"eval_calibration/coverage@5%": 0.13020833333333334,
"eval_calibration/ece": 0.18436342808133366,
"eval_calibration/mean_confidence": 0.7474808675974627,
"eval_completions/clipped_ratio": 0.010243055555555566,
"eval_completions/max_length": 2500.6666666666665,
"eval_completions/max_terminated_length": 2500.6666666666665,
"eval_completions/mean_length": 760.373291015625,
"eval_completions/mean_terminated_length": 768.1798095703125,
"eval_completions/min_length": 31.0,
"eval_completions/min_terminated_length": 264.1666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 444922988.0,
"eval_reward": 1.0161888599395752,
"eval_reward_std": 0.25519714256127674,
"eval_rewards/accuracy_reward": 0.6892361144224802,
"eval_rewards/brier_reward": 0.8225160837173462,
"eval_rewards/confidence_uniqueness_reward": 0.8791253864765167,
"eval_rewards/format_reward": 0.9913194477558136,
"eval_rewards/frontier_coverage_0": 0.037744694078962006,
"eval_rewards/frontier_coverage_1": 0.037744694078962006,
"eval_rewards/frontier_coverage_10": 0.037526294899483524,
"eval_rewards/frontier_coverage_15": 0.03331689815968275,
"eval_rewards/frontier_coverage_20": 0.05279789244135221,
"eval_rewards/frontier_coverage_25": 0.16500501583019891,
"eval_rewards/frontier_coverage_5": 0.037744694078962006,
"eval_runtime": 191.4272,
"eval_samples_per_second": 5.224,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4150390625,
"eval_signal/accuracy_reward/group_std_mean": 0.46090074876944226,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20751953125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20751953125,
"eval_signal/advantage_abs_mean": 0.22260082264741263,
"eval_signal/advantage_pre_scale_abs_mean": 0.22260082264741263,
"eval_signal/advantage_pre_scale_std": 0.2543907364209493,
"eval_signal/advantage_std": 0.2543907364209493,
"eval_signal/brier_reward/centered_abs_mean": 0.20484093576669693,
"eval_signal/brier_reward/group_std_mean": 0.2656843389074008,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02048409388711055,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02048409388711055,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.058576944594581924,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08974225322405498,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00585769466124475,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00585769466124475,
"eval_signal/format_reward/centered_abs_mean": 0.016710069340964157,
"eval_signal/format_reward/group_std_mean": 0.04611522828539213,
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.008355034670482079,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.008355034670482079,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.14778954287370047,
"eval_signal/frontier_coverage_0/group_std_mean": 0.2527366851766904,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021133904034892717,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021133904034892717,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.14778954287370047,
"eval_signal/frontier_coverage_1/group_std_mean": 0.2527366851766904,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021133904034892717,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021133904034892717,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.1412328581015269,
"eval_signal/frontier_coverage_10/group_std_mean": 0.24346366773049036,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020196297749256096,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020196297749256096,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.07746745770176251,
"eval_signal/frontier_coverage_15/group_std_mean": 0.14579874525467554,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011077846284024417,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011077846284024417,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.06913943216204643,
"eval_signal/frontier_coverage_20/group_std_mean": 0.09641388555367787,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009886938593505572,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009886938593505572,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.20433313151200613,
"eval_signal/frontier_coverage_25/group_std_mean": 0.24381026128927866,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029219637314478555,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029219637314478555,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.14778954287370047,
"eval_signal/frontier_coverage_5/group_std_mean": 0.2527366851766904,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021133904034892717,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021133904034892717,
"eval_steps_per_second": 0.031,
"step": 200
},
{
"calibration/aurc": 0.13666291697376665,
"calibration/batch_distribution_entropy": 0.7343215574423284,
"calibration/buffer_distribution_entropy": 0.8161242104108076,
"calibration/confidence_entropy": 0.35527636219121406,
"calibration/coverage@0%": 0.02404686068924036,
"calibration/coverage@1%": 0.02404686068924036,
"calibration/coverage@10%": 0.38688655743384415,
"calibration/coverage@15%": 0.5568907728823345,
"calibration/coverage@20%": 0.8736886372655908,
"calibration/coverage@25%": 0.9124568852977146,
"calibration/coverage@30%": 0.967469184153028,
"calibration/coverage@5%": 0.18059922444215273,
"calibration/ece": 0.10120437717746822,
"calibration/mean_confidence": 0.774974210661998,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006684027777777768,
"completions/max_length": 3385.2,
"completions/max_terminated_length": 3385.2,
"completions/mean_length": 771.1322143554687,
"completions/mean_terminated_length": 776.322998046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 204.0,
"epoch": 0.491993850076874,
"grad_norm": 0.000427232647780329,
"learning_rate": 9.036144578313253e-08,
"loss": -0.0044,
"num_tokens": 456872383.0,
"reward": 1.061851143836975,
"reward_std": 0.10766315758228302,
"rewards/accuracy_reward": 0.7657118082046509,
"rewards/brier_reward": 0.8460538625717163,
"rewards/confidence_uniqueness_reward": 0.9307031512260437,
"rewards/format_reward": 0.9933159708976745,
"rewards/frontier_coverage_0": 0.010852430667728185,
"rewards/frontier_coverage_1": 0.010852430667728185,
"rewards/frontier_coverage_10": 0.011620633210986853,
"rewards/frontier_coverage_15": 0.022410315554589035,
"rewards/frontier_coverage_20": 0.0554275631904602,
"rewards/frontier_coverage_25": 0.20396708250045775,
"rewards/frontier_coverage_5": 0.010852430667728185,
"signal/accuracy_reward/centered_abs_mean": 0.13183051347732544,
"signal/accuracy_reward/group_std_mean": 0.1805938422679901,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06591525673866272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06591525673866272,
"signal/advantage_abs_mean": 0.07528244256973267,
"signal/advantage_pre_scale_abs_mean": 0.07528244256973267,
"signal/advantage_pre_scale_std": 0.14561330080032348,
"signal/advantage_std": 0.14561330080032348,
"signal/brier_reward/centered_abs_mean": 0.10941742211580277,
"signal/brier_reward/group_std_mean": 0.14824758172035218,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010941742919385433,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010941742919385433,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03412656709551811,
"signal/confidence_uniqueness_reward/group_std_mean": 0.052902082353830336,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034126567654311655,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034126567654311655,
"signal/format_reward/centered_abs_mean": 0.011886935960501433,
"signal/format_reward/group_std_mean": 0.02607992962002754,
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0059434679802507166,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0059434679802507166,
"signal/frontier_coverage_0/centered_abs_mean": 0.10363202840089798,
"signal/frontier_coverage_0/group_std_mean": 0.144540935754776,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014819379895925522,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014819379895925522,
"signal/frontier_coverage_1/centered_abs_mean": 0.10363202840089798,
"signal/frontier_coverage_1/group_std_mean": 0.144540935754776,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014819379895925522,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014819379895925522,
"signal/frontier_coverage_10/centered_abs_mean": 0.09921518713235855,
"signal/frontier_coverage_10/group_std_mean": 0.13871577978134156,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014187771128490567,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014187771128490567,
"signal/frontier_coverage_15/centered_abs_mean": 0.06100954413414002,
"signal/frontier_coverage_15/group_std_mean": 0.08565070480108261,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008724364801310003,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008724364801310003,
"signal/frontier_coverage_20/centered_abs_mean": 0.05148982182145119,
"signal/frontier_coverage_20/group_std_mean": 0.06858698725700378,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007363044773228466,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007363044773228466,
"signal/frontier_coverage_25/centered_abs_mean": 0.09679168611764907,
"signal/frontier_coverage_25/group_std_mean": 0.12770785093307496,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013841211097314955,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013841211097314955,
"signal/frontier_coverage_5/centered_abs_mean": 0.10363202840089798,
"signal/frontier_coverage_5/group_std_mean": 0.144540935754776,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014819379895925522,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014819379895925522,
"step": 205
},
{
"calibration/aurc": 0.08977735855554447,
"calibration/batch_distribution_entropy": 0.7258205856417015,
"calibration/buffer_distribution_entropy": 0.8129820730847942,
"calibration/confidence_entropy": 0.3772828251089602,
"calibration/coverage@0%": 0.014834205933682372,
"calibration/coverage@1%": 0.014834205933682372,
"calibration/coverage@10%": 0.6369944219839089,
"calibration/coverage@15%": 0.8318157401648296,
"calibration/coverage@20%": 0.9493891797556718,
"calibration/coverage@25%": 0.9912739965095986,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.38716781238181164,
"calibration/ece": 0.0650420626430733,
"calibration/mean_confidence": 0.801219024708996,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005208333333333333,
"completions/max_length": 3513.0,
"completions/max_terminated_length": 3513.0,
"completions/mean_length": 768.9353434244791,
"completions/mean_terminated_length": 773.00634765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.33333333333334,
"epoch": 0.49919376007799904,
"num_tokens": 464053312.0,
"reward": 1.0329957803090413,
"reward_std": 0.11097157249848048,
"rewards/accuracy_reward": 0.7083333333333334,
"rewards/brier_reward": 0.8263349533081055,
"rewards/confidence_uniqueness_reward": 0.9369557102521261,
"rewards/format_reward": 0.9947916666666666,
"rewards/frontier_coverage_0": 0.024060875177383423,
"rewards/frontier_coverage_1": 0.024060875177383423,
"rewards/frontier_coverage_10": 0.023912989844878513,
"rewards/frontier_coverage_15": 0.02603423222899437,
"rewards/frontier_coverage_20": 0.053175790856281914,
"rewards/frontier_coverage_25": 0.18163357178370157,
"rewards/frontier_coverage_5": 0.024060875177383423,
"signal/accuracy_reward/centered_abs_mean": 0.14668330550193787,
"signal/accuracy_reward/group_std_mean": 0.19076116383075714,
"signal/accuracy_reward/group_zero_std_frac": 0.4722222189108531,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07334165275096893,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07334165275096893,
"signal/advantage_abs_mean": 0.08218363424142201,
"signal/advantage_pre_scale_abs_mean": 0.08218363424142201,
"signal/advantage_pre_scale_std": 0.15012098848819733,
"signal/advantage_std": 0.15012098848819733,
"signal/brier_reward/centered_abs_mean": 0.11896256854136784,
"signal/brier_reward/group_std_mean": 0.15338205297787985,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011896257288753986,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011896257288753986,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030714243029554684,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04745869214336077,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030714243184775114,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030714243184775114,
"signal/format_reward/centered_abs_mean": 0.009801794153948626,
"signal/format_reward/group_std_mean": 0.022312050685286522,
"signal/format_reward/group_zero_std_frac": 0.8981481591860453,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004900897076974313,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004900897076974313,
"signal/frontier_coverage_0/centered_abs_mean": 0.11180164168278377,
"signal/frontier_coverage_0/group_std_mean": 0.15422282616297403,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001598763473642369,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001598763473642369,
"signal/frontier_coverage_1/centered_abs_mean": 0.11180164168278377,
"signal/frontier_coverage_1/group_std_mean": 0.15422282616297403,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001598763473642369,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001598763473642369,
"signal/frontier_coverage_10/centered_abs_mean": 0.10676760226488113,
"signal/frontier_coverage_10/group_std_mean": 0.14766866465409598,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015267768564323585,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015267768564323585,
"signal/frontier_coverage_15/centered_abs_mean": 0.06299562752246857,
"signal/frontier_coverage_15/group_std_mean": 0.08778965721527736,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009008374957678219,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009008374957678219,
"signal/frontier_coverage_20/centered_abs_mean": 0.05205661058425903,
"signal/frontier_coverage_20/group_std_mean": 0.06810636073350906,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007444095293370386,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007444095293370386,
"signal/frontier_coverage_25/centered_abs_mean": 0.10553650557994843,
"signal/frontier_coverage_25/group_std_mean": 0.13382530957460403,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015091719493890803,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015091719493890803,
"signal/frontier_coverage_5/centered_abs_mean": 0.11180164168278377,
"signal/frontier_coverage_5/group_std_mean": 0.15422282616297403,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001598763473642369,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001598763473642369,
"step": 208,
"total_flos": 0.0,
"train_loss": -0.006107796076461314,
"train_runtime": 40819.9823,
"train_samples_per_second": 0.367,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 208,
"num_input_tokens_seen": 464053312,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}