Files
RLCR-v4-ks-uniqueness-cov0-…/trainer_state.json
ModelHub XC d9f0210ad3 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-noece-noaurc-scaletrue-batchcov-hotpot
Source: Original Platform
2026-05-09 19:34:41 +08:00

8566 lines
519 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.6373261091843637,
"calibration/batch_distribution_entropy": 0.6465861666510452,
"calibration/confidence_entropy": 0.34232269490001105,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5009005491505466,
"calibration/mean_confidence": 0.7910126313284384,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03662109375,
"completions/max_length": 1507.6,
"completions/max_terminated_length": 1507.6,
"completions/mean_length": 215.51953125,
"completions/mean_terminated_length": 223.70487365722656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.1652095913887024,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0242,
"num_tokens": 17050952.0,
"reward": 0.4797627925872803,
"reward_std": 0.35704264640808103,
"rewards/accuracy_reward": 0.21953125,
"rewards/batch_coverage_0": 0.0552757054567337,
"rewards/batch_coverage_1": 0.0552757054567337,
"rewards/batch_coverage_10": 0.07531758025288582,
"rewards/batch_coverage_15": 0.08600667119026184,
"rewards/batch_coverage_20": 0.09857667386531829,
"rewards/batch_coverage_25": 0.10926464796066285,
"rewards/batch_coverage_5": 0.06029992550611496,
"rewards/brier_reward": 0.37625510096549986,
"rewards/confidence_uniqueness_reward": 0.4875007390975952,
"rewards/format_reward": 0.681640625,
"rewards/frontier_entropy_batch_reward": -0.6492096781730652,
"signal/accuracy_reward/centered_abs_mean": 0.23970947265625,
"signal/accuracy_reward/group_std_mean": 0.2809469699859619,
"signal/accuracy_reward/group_zero_std_frac": 0.328125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.35993377566337587,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.119854736328125,
"signal/advantage_abs_mean": 0.8450886011123657,
"signal/advantage_pre_scale_abs_mean": 0.30423404574394225,
"signal/advantage_pre_scale_std": 0.36350384950637815,
"signal/advantage_std": 0.9841925024986267,
"signal/batch_coverage_0/centered_abs_mean": 0.07735746204853058,
"signal/batch_coverage_0/group_std_mean": 0.12862617671489715,
"signal/batch_coverage_0/group_zero_std_frac": 0.003125,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.003360638115555048,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0011062117060646414,
"signal/batch_coverage_1/centered_abs_mean": 0.07735746204853058,
"signal/batch_coverage_1/group_std_mean": 0.12862617671489715,
"signal/batch_coverage_1/group_zero_std_frac": 0.003125,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.003360638115555048,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0011062117060646414,
"signal/batch_coverage_10/centered_abs_mean": 0.09151717722415924,
"signal/batch_coverage_10/group_std_mean": 0.1435583233833313,
"signal/batch_coverage_10/group_zero_std_frac": 0.003125,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.003964567929506302,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0013086956227198244,
"signal/batch_coverage_15/centered_abs_mean": 0.10193184614181519,
"signal/batch_coverage_15/group_std_mean": 0.15447192192077636,
"signal/batch_coverage_15/group_zero_std_frac": 0.003125,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.004429516848176718,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.001457625417970121,
"signal/batch_coverage_20/centered_abs_mean": 0.11815007328987122,
"signal/batch_coverage_20/group_std_mean": 0.17157170474529265,
"signal/batch_coverage_20/group_zero_std_frac": 0.003125,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.005136752594262361,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0016895460663363337,
"signal/batch_coverage_25/centered_abs_mean": 0.1344620779156685,
"signal/batch_coverage_25/group_std_mean": 0.18939262628555298,
"signal/batch_coverage_25/group_zero_std_frac": 0.003125,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.005850685015320778,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.001922807819209993,
"signal/batch_coverage_5/centered_abs_mean": 0.08038161844015121,
"signal/batch_coverage_5/group_std_mean": 0.1318250775337219,
"signal/batch_coverage_5/group_zero_std_frac": 0.003125,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0034911792725324632,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0011494571343064308,
"signal/brier_reward/centered_abs_mean": 0.3200297772884369,
"signal/brier_reward/group_std_mean": 0.36500824689865113,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09681591242551804,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.032002977281808856,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.3005684792995453,
"signal/confidence_uniqueness_reward/group_std_mean": 0.35096290707588196,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09130170047283173,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.030056847631931304,
"signal/format_reward/centered_abs_mean": 0.40726318359375,
"signal/format_reward/group_std_mean": 0.4561456859111786,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.61845782995224,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.203631591796875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.43004666566848754,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4745893657207489,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.13057637959718704,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04300466775894165,
"step": 5
},
{
"calibration/aurc": 0.6795537579618556,
"calibration/batch_distribution_entropy": 0.6636404394336322,
"calibration/confidence_entropy": 0.34291309742836623,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5240589456018558,
"calibration/mean_confidence": 0.7873437694455336,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.030859375,
"completions/max_length": 1486.2,
"completions/max_terminated_length": 1486.2,
"completions/mean_length": 201.25380859375,
"completions/mean_terminated_length": 207.68313598632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.1985611766576767,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0198,
"num_tokens": 34212143.0,
"reward": 0.514380669593811,
"reward_std": 0.33193816542625426,
"rewards/accuracy_reward": 0.21796875,
"rewards/batch_coverage_0": 0.054630983620882034,
"rewards/batch_coverage_1": 0.054630983620882034,
"rewards/batch_coverage_10": 0.09490734785795212,
"rewards/batch_coverage_15": 0.10108360797166824,
"rewards/batch_coverage_20": 0.11422456949949264,
"rewards/batch_coverage_25": 0.12260636389255523,
"rewards/batch_coverage_5": 0.07091258615255355,
"rewards/brier_reward": 0.39744025468826294,
"rewards/confidence_uniqueness_reward": 0.5402819037437439,
"rewards/format_reward": 0.748046875,
"rewards/frontier_entropy_batch_reward": -0.711652135848999,
"signal/accuracy_reward/centered_abs_mean": 0.223095703125,
"signal/accuracy_reward/group_std_mean": 0.2711519658565521,
"signal/accuracy_reward/group_zero_std_frac": 0.309375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3662183403968811,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1115478515625,
"signal/advantage_abs_mean": 0.7963252782821655,
"signal/advantage_pre_scale_abs_mean": 0.2699368894100189,
"signal/advantage_pre_scale_std": 0.3400941789150238,
"signal/advantage_std": 0.9841671347618103,
"signal/batch_coverage_0/centered_abs_mean": 0.0823998749256134,
"signal/batch_coverage_0/group_std_mean": 0.13471100330352784,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0038847104646265508,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001178318215534091,
"signal/batch_coverage_1/centered_abs_mean": 0.0823998749256134,
"signal/batch_coverage_1/group_std_mean": 0.13471100330352784,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0038847104646265508,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001178318215534091,
"signal/batch_coverage_10/centered_abs_mean": 0.10847108513116836,
"signal/batch_coverage_10/group_std_mean": 0.16454391479492186,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.005101960431784391,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0015511365141719579,
"signal/batch_coverage_15/centered_abs_mean": 0.11424930393695831,
"signal/batch_coverage_15/group_std_mean": 0.17042292654514313,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.005372866988182068,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0016337650828063487,
"signal/batch_coverage_20/centered_abs_mean": 0.12856489270925522,
"signal/batch_coverage_20/group_std_mean": 0.18573523461818695,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0060584286227822306,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0018384779570624233,
"signal/batch_coverage_25/centered_abs_mean": 0.1404997855424881,
"signal/batch_coverage_25/group_std_mean": 0.19834802746772767,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0066345173865556715,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020091469399631024,
"signal/batch_coverage_5/centered_abs_mean": 0.09054070562124253,
"signal/batch_coverage_5/group_std_mean": 0.14401516318321228,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.004269297886639834,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0012947321170940994,
"signal/brier_reward/centered_abs_mean": 0.3056317925453186,
"signal/brier_reward/group_std_mean": 0.3545816421508789,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10050017833709717,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03056318052113056,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2679386377334595,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3307202637195587,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08798813968896865,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.026793863996863364,
"signal/format_reward/centered_abs_mean": 0.35068359375,
"signal/format_reward/group_std_mean": 0.4214269757270813,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5750837802886963,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.175341796875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3855856955051422,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4489862143993378,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12660761475563048,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038558568060398105,
"step": 10
},
{
"calibration/aurc": 0.6169754589257901,
"calibration/batch_distribution_entropy": 0.6509359190029964,
"calibration/buffer_distribution_entropy": 0.6632450401167147,
"calibration/confidence_entropy": 0.3447272173329581,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.47127741331913714,
"calibration/mean_confidence": 0.7999372677967667,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0150390625,
"completions/max_length": 1304.0,
"completions/max_terminated_length": 1304.0,
"completions/mean_length": 165.9875,
"completions/mean_terminated_length": 168.5960906982422,
"completions/min_length": 0.0,
"completions/min_terminated_length": 29.2,
"epoch": 0.048,
"grad_norm": 0.02442619577050209,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0036,
"num_tokens": 50960591.0,
"reward": 0.6489390015602112,
"reward_std": 0.2351370334625244,
"rewards/accuracy_reward": 0.2857421875,
"rewards/batch_coverage_0": 0.08331729024648667,
"rewards/batch_coverage_1": 0.08331729024648667,
"rewards/batch_coverage_10": 0.14008204489946366,
"rewards/batch_coverage_15": 0.15269193649291993,
"rewards/batch_coverage_20": 0.1704973042011261,
"rewards/batch_coverage_25": 0.17928307056427,
"rewards/batch_coverage_5": 0.09736606627702712,
"rewards/brier_reward": 0.5068644285202026,
"rewards/confidence_uniqueness_reward": 0.6822745203971863,
"rewards/format_reward": 0.92216796875,
"rewards/frontier_entropy_batch_reward": -0.868937087059021,
"signal/accuracy_reward/centered_abs_mean": 0.19013671875,
"signal/accuracy_reward/group_std_mean": 0.239943265914917,
"signal/accuracy_reward/group_zero_std_frac": 0.3625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.5162946462631226,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.095068359375,
"signal/advantage_abs_mean": 0.68557368516922,
"signal/advantage_pre_scale_abs_mean": 0.17026761323213577,
"signal/advantage_pre_scale_std": 0.24687082171440125,
"signal/advantage_std": 0.9839404463768006,
"signal/batch_coverage_0/centered_abs_mean": 0.10061688423156738,
"signal/batch_coverage_0/group_std_mean": 0.15735834538936616,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.008269770722836255,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001438821479678154,
"signal/batch_coverage_1/centered_abs_mean": 0.10061688423156738,
"signal/batch_coverage_1/group_std_mean": 0.15735834538936616,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.008269770722836255,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001438821479678154,
"signal/batch_coverage_10/centered_abs_mean": 0.12309518903493881,
"signal/batch_coverage_10/group_std_mean": 0.1861822485923767,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.010132433753460646,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0017602612962946296,
"signal/batch_coverage_15/centered_abs_mean": 0.1313455581665039,
"signal/batch_coverage_15/group_std_mean": 0.19549228847026826,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.010681292042136192,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.001878241472877562,
"signal/batch_coverage_20/centered_abs_mean": 0.14566806256771087,
"signal/batch_coverage_20/group_std_mean": 0.21204695403575896,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011906150355935096,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020830533001571895,
"signal/batch_coverage_25/centered_abs_mean": 0.15666291415691375,
"signal/batch_coverage_25/group_std_mean": 0.22407272458076477,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012635924853384495,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0022402796894311905,
"signal/batch_coverage_5/centered_abs_mean": 0.10522163063287734,
"signal/batch_coverage_5/group_std_mean": 0.16216584146022797,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.008624614495784045,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0015046692918986083,
"signal/brier_reward/centered_abs_mean": 0.25743305683135986,
"signal/brier_reward/group_std_mean": 0.3131587505340576,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14089297950267793,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.025743305310606958,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17150525450706483,
"signal/confidence_uniqueness_reward/group_std_mean": 0.22964869439601898,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09172600209712982,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017150526121258735,
"signal/format_reward/centered_abs_mean": 0.134808349609375,
"signal/format_reward/group_std_mean": 0.23255448937416076,
"signal/format_reward/group_zero_std_frac": 0.153125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.3284162819385529,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0674041748046875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21744501888751983,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3348918974399567,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.034375,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11601799428462982,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02174450196325779,
"step": 15
},
{
"calibration/aurc": 0.5160265874145994,
"calibration/batch_distribution_entropy": 0.7821093596540675,
"calibration/buffer_distribution_entropy": 0.6721649986596376,
"calibration/confidence_entropy": 0.419975819470785,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3147944777253985,
"calibration/mean_confidence": 0.7203686667722206,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00400390625,
"completions/max_length": 1025.6,
"completions/max_terminated_length": 1025.6,
"completions/mean_length": 128.2537109375,
"completions/mean_terminated_length": 128.78225402832032,
"completions/min_length": 0.0,
"completions/min_terminated_length": 30.6,
"epoch": 0.064,
"grad_norm": 0.031037550419569016,
"learning_rate": 1e-06,
"loss": -0.0098,
"num_tokens": 67192309.0,
"reward": 0.7435286998748779,
"reward_std": 0.18170610964298248,
"rewards/accuracy_reward": 0.3544921875,
"rewards/batch_coverage_0": 0.10424036830663681,
"rewards/batch_coverage_1": 0.10424036830663681,
"rewards/batch_coverage_10": 0.17098541855812072,
"rewards/batch_coverage_15": 0.18716970086097717,
"rewards/batch_coverage_20": 0.20965131223201752,
"rewards/batch_coverage_25": 0.21948152482509614,
"rewards/batch_coverage_5": 0.1322124183177948,
"rewards/brier_reward": 0.6193184971809387,
"rewards/confidence_uniqueness_reward": 0.8077521324157715,
"rewards/format_reward": 0.987109375,
"rewards/frontier_entropy_batch_reward": -0.8610928893089295,
"signal/accuracy_reward/centered_abs_mean": 0.1988037109375,
"signal/accuracy_reward/group_std_mean": 0.2517113327980042,
"signal/accuracy_reward/group_zero_std_frac": 0.325,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7254745364189148,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09940185546875,
"signal/advantage_abs_mean": 0.7277890801429748,
"signal/advantage_pre_scale_abs_mean": 0.1374871164560318,
"signal/advantage_pre_scale_std": 0.1954087108373642,
"signal/advantage_std": 0.983764922618866,
"signal/batch_coverage_0/centered_abs_mean": 0.13318662196397782,
"signal/batch_coverage_0/group_std_mean": 0.19517841041088105,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.014266725815832614,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001904568658210337,
"signal/batch_coverage_1/centered_abs_mean": 0.13318662196397782,
"signal/batch_coverage_1/group_std_mean": 0.19517841041088105,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.014266725815832614,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001904568658210337,
"signal/batch_coverage_10/centered_abs_mean": 0.15405770838260652,
"signal/batch_coverage_10/group_std_mean": 0.22008646428585052,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01639944761991501,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0022030251566320658,
"signal/batch_coverage_15/centered_abs_mean": 0.16269078552722932,
"signal/batch_coverage_15/group_std_mean": 0.23098040223121644,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01727539598941803,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0023264782037585975,
"signal/batch_coverage_20/centered_abs_mean": 0.17800569534301758,
"signal/batch_coverage_20/group_std_mean": 0.24921154975891113,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01889161504805088,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025454814080148936,
"signal/batch_coverage_25/centered_abs_mean": 0.1886595755815506,
"signal/batch_coverage_25/group_std_mean": 0.2600310921669006,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.019977013394236565,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0026978319510817528,
"signal/batch_coverage_5/centered_abs_mean": 0.13942071199417114,
"signal/batch_coverage_5/group_std_mean": 0.20169951319694518,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01490629930049181,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019937161123380065,
"signal/brier_reward/centered_abs_mean": 0.23416422307491302,
"signal/brier_reward/group_std_mean": 0.28761149048805235,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1729327619075775,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023416423052549363,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08846724629402161,
"signal/confidence_uniqueness_reward/group_std_mean": 0.12165791392326356,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06507326290011406,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008846724499017,
"signal/format_reward/centered_abs_mean": 0.02449951171875,
"signal/format_reward/group_std_mean": 0.06146693043410778,
"signal/format_reward/group_zero_std_frac": 0.690625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08888003826141358,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012249755859375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22997065484523774,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36477545499801634,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0375,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.1725946694612503,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022997065633535384,
"step": 20
},
{
"calibration/aurc": 0.659273068888402,
"calibration/batch_distribution_entropy": 0.9708581625874132,
"calibration/buffer_distribution_entropy": 0.7680514501525051,
"calibration/confidence_entropy": 0.49606242445239357,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.24383066141563323,
"calibration/mean_confidence": 0.4667354515596018,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0033203125,
"completions/max_length": 825.0,
"completions/max_terminated_length": 825.0,
"completions/mean_length": 106.2681640625,
"completions/mean_terminated_length": 106.61890869140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 35.8,
"epoch": 0.08,
"grad_norm": 0.013238995335996151,
"learning_rate": 1e-06,
"loss": -0.0245,
"num_tokens": 83213647.0,
"reward": 0.8145912766456604,
"reward_std": 0.15870223343372344,
"rewards/accuracy_reward": 0.34990234375,
"rewards/batch_coverage_0": 0.19447994828224183,
"rewards/batch_coverage_1": 0.19447994828224183,
"rewards/batch_coverage_10": 0.23603789806365966,
"rewards/batch_coverage_15": 0.24778175055980683,
"rewards/batch_coverage_20": 0.2619646489620209,
"rewards/batch_coverage_25": 0.27012325525283815,
"rewards/batch_coverage_5": 0.21396056711673736,
"rewards/brier_reward": 0.7053467035293579,
"rewards/confidence_uniqueness_reward": 0.9336926817893982,
"rewards/format_reward": 0.99267578125,
"rewards/frontier_entropy_batch_reward": -0.4375097811222076,
"signal/accuracy_reward/centered_abs_mean": 0.193804931640625,
"signal/accuracy_reward/group_std_mean": 0.2428890973329544,
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8063225746154785,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0969024658203125,
"signal/advantage_abs_mean": 0.7467060685157776,
"signal/advantage_pre_scale_abs_mean": 0.12001456767320633,
"signal/advantage_pre_scale_std": 0.17081058919429778,
"signal/advantage_std": 0.9836754441261292,
"signal/batch_coverage_0/centered_abs_mean": 0.2497153639793396,
"signal/batch_coverage_0/group_std_mean": 0.31533910632133483,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02987198568880558,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0035709296353161335,
"signal/batch_coverage_1/centered_abs_mean": 0.2497153639793396,
"signal/batch_coverage_1/group_std_mean": 0.31533910632133483,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02987198568880558,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0035709296353161335,
"signal/batch_coverage_10/centered_abs_mean": 0.25728048086166383,
"signal/batch_coverage_10/group_std_mean": 0.3238477647304535,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03072645589709282,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0036791109945625068,
"signal/batch_coverage_15/centered_abs_mean": 0.2555805444717407,
"signal/batch_coverage_15/group_std_mean": 0.32144402861595156,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03051029294729233,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.003654801845550537,
"signal/batch_coverage_20/centered_abs_mean": 0.25660484433174136,
"signal/batch_coverage_20/group_std_mean": 0.323328423500061,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.030591926723718642,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0036694493144750597,
"signal/batch_coverage_25/centered_abs_mean": 0.2620661616325378,
"signal/batch_coverage_25/group_std_mean": 0.3306765556335449,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03118036426603794,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0037475463002920152,
"signal/batch_coverage_5/centered_abs_mean": 0.25281934440135956,
"signal/batch_coverage_5/group_std_mean": 0.3182687520980835,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.030213556066155434,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0036153166554868223,
"signal/brier_reward/centered_abs_mean": 0.2312027245759964,
"signal/brier_reward/group_std_mean": 0.28400464057922364,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19109801054000855,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023120272532105447,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03871437720954418,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0667199194431305,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03128206320106983,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038714376278221607,
"signal/format_reward/centered_abs_mean": 0.014093017578125,
"signal/format_reward/group_std_mean": 0.039295760542154314,
"signal/format_reward/group_zero_std_frac": 0.784375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05787626802921295,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0070465087890625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4507632553577423,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5134151577949524,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3705613732337952,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04507632553577423,
"step": 25
},
{
"calibration/aurc": 0.6369551230672109,
"calibration/batch_distribution_entropy": 0.8055908332520693,
"calibration/buffer_distribution_entropy": 0.8725075291862959,
"calibration/confidence_entropy": 0.440458014707745,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.003937007874015748,
"calibration/coverage@25%": 0.006299212598425197,
"calibration/coverage@30%": 0.01062992125984252,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.2013707173339057,
"calibration/mean_confidence": 0.24628456410768784,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00654296875,
"completions/max_length": 1000.2,
"completions/max_terminated_length": 1000.2,
"completions/mean_length": 112.25400390625,
"completions/mean_terminated_length": 112.99878387451172,
"completions/min_length": 0.0,
"completions/min_terminated_length": 41.6,
"epoch": 0.096,
"grad_norm": 0.022800516337156296,
"learning_rate": 1e-06,
"loss": -0.0329,
"num_tokens": 99407736.0,
"reward": 0.8120527863502502,
"reward_std": 0.13627898395061494,
"rewards/accuracy_reward": 0.36025390625,
"rewards/batch_coverage_0": 0.2765601396560669,
"rewards/batch_coverage_1": 0.2765601396560669,
"rewards/batch_coverage_10": 0.30208975076675415,
"rewards/batch_coverage_15": 0.31157302260398867,
"rewards/batch_coverage_20": 0.3181931436061859,
"rewards/batch_coverage_25": 0.3236126244068146,
"rewards/batch_coverage_5": 0.2889863818883896,
"rewards/brier_reward": 0.7170829892158508,
"rewards/confidence_uniqueness_reward": 0.9133394241333008,
"rewards/format_reward": 0.9904296875,
"rewards/frontier_entropy_batch_reward": -0.5632658362388611,
"signal/accuracy_reward/centered_abs_mean": 0.183563232421875,
"signal/accuracy_reward/group_std_mean": 0.23314289450645448,
"signal/accuracy_reward/group_zero_std_frac": 0.36875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.179761004447937,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0917816162109375,
"signal/advantage_abs_mean": 0.703443419933319,
"signal/advantage_pre_scale_abs_mean": 0.09668720364570618,
"signal/advantage_pre_scale_std": 0.15405036211013795,
"signal/advantage_std": 0.9832155346870423,
"signal/batch_coverage_0/centered_abs_mean": 0.25192484855651853,
"signal/batch_coverage_0/group_std_mean": 0.31211569905281067,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04653439298272133,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.003602525359019637,
"signal/batch_coverage_1/centered_abs_mean": 0.25192484855651853,
"signal/batch_coverage_1/group_std_mean": 0.31211569905281067,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04653439298272133,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.003602525359019637,
"signal/batch_coverage_10/centered_abs_mean": 0.25177322924137113,
"signal/batch_coverage_10/group_std_mean": 0.3137675106525421,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04646962657570839,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0036003570538014175,
"signal/batch_coverage_15/centered_abs_mean": 0.24674721360206603,
"signal/batch_coverage_15/group_std_mean": 0.3090873181819916,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.045537931472063066,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.003528485121205449,
"signal/batch_coverage_20/centered_abs_mean": 0.23448554873466493,
"signal/batch_coverage_20/group_std_mean": 0.29566892981529236,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04342522844672203,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.003353143390268087,
"signal/batch_coverage_25/centered_abs_mean": 0.22657085955142975,
"signal/batch_coverage_25/group_std_mean": 0.2883758008480072,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.042099975794553754,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.003239963296800852,
"signal/batch_coverage_5/centered_abs_mean": 0.2547337025403976,
"signal/batch_coverage_5/group_std_mean": 0.3161489307880402,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04701984152197838,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0036426919978111982,
"signal/brier_reward/centered_abs_mean": 0.1984732985496521,
"signal/brier_reward/group_std_mean": 0.25304334461688993,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.25615803599357606,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019847330078482626,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04229490533471107,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07432217746973038,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05456542745232582,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004229490412399173,
"signal/format_reward/centered_abs_mean": 0.01829833984375,
"signal/format_reward/group_std_mean": 0.048546963930130006,
"signal/format_reward/group_zero_std_frac": 0.74375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11600722074508667,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009149169921875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4489035427570343,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5113010764122009,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.581957995891571,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04489035606384277,
"step": 30
},
{
"calibration/aurc": 0.5173940626537294,
"calibration/batch_distribution_entropy": 0.9432925466581696,
"calibration/buffer_distribution_entropy": 0.9257200120173643,
"calibration/confidence_entropy": 0.5337548494679153,
"calibration/coverage@0%": 0.0023622230677838433,
"calibration/coverage@1%": 0.0023622230677838433,
"calibration/coverage@10%": 0.0023622230677838433,
"calibration/coverage@15%": 0.0035410049931276547,
"calibration/coverage@20%": 0.0035410049931276547,
"calibration/coverage@25%": 0.003933932301575592,
"calibration/coverage@30%": 0.003933932301575592,
"calibration/coverage@5%": 0.0023622230677838433,
"calibration/ece": 0.19002162983403126,
"calibration/mean_confidence": 0.4036477289378933,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0080078125,
"completions/max_length": 1129.8,
"completions/max_terminated_length": 1129.8,
"completions/mean_length": 126.34736328125,
"completions/mean_terminated_length": 127.37257385253906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 50.6,
"epoch": 0.112,
"grad_norm": 0.011110126040875912,
"learning_rate": 1e-06,
"loss": -0.0313,
"num_tokens": 115811005.0,
"reward": 0.8575616359710694,
"reward_std": 0.1483265370130539,
"rewards/accuracy_reward": 0.412109375,
"rewards/batch_coverage_0": 0.18250919580459596,
"rewards/batch_coverage_1": 0.18250919580459596,
"rewards/batch_coverage_10": 0.219953316450119,
"rewards/batch_coverage_15": 0.23371829092502594,
"rewards/batch_coverage_20": 0.2468995362520218,
"rewards/batch_coverage_25": 0.2535784751176834,
"rewards/batch_coverage_5": 0.20052466094493865,
"rewards/brier_reward": 0.7087387323379517,
"rewards/confidence_uniqueness_reward": 0.9385404348373413,
"rewards/format_reward": 0.99140625,
"rewards/frontier_entropy_batch_reward": -0.30655706524848936,
"signal/accuracy_reward/centered_abs_mean": 0.18665771484375,
"signal/accuracy_reward/group_std_mean": 0.2335004061460495,
"signal/accuracy_reward/group_zero_std_frac": 0.384375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8936283946037292,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.093328857421875,
"signal/advantage_abs_mean": 0.7414809942245484,
"signal/advantage_pre_scale_abs_mean": 0.11034760773181915,
"signal/advantage_pre_scale_std": 0.16365787088871003,
"signal/advantage_std": 0.9835437655448913,
"signal/batch_coverage_0/centered_abs_mean": 0.2369515985250473,
"signal/batch_coverage_0/group_std_mean": 0.2966028988361359,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.032617274671792984,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0033884078729897738,
"signal/batch_coverage_1/centered_abs_mean": 0.2369515985250473,
"signal/batch_coverage_1/group_std_mean": 0.2966028988361359,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.032617274671792984,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0033884078729897738,
"signal/batch_coverage_10/centered_abs_mean": 0.2426645427942276,
"signal/batch_coverage_10/group_std_mean": 0.3042888045310974,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03337412625551224,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.00347010288387537,
"signal/batch_coverage_15/centered_abs_mean": 0.23903344869613646,
"signal/batch_coverage_15/group_std_mean": 0.3005025327205658,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.032858715206384656,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.003418178344145417,
"signal/batch_coverage_20/centered_abs_mean": 0.2379598081111908,
"signal/batch_coverage_20/group_std_mean": 0.30019118785858157,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0327223714441061,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.003402825305238366,
"signal/batch_coverage_25/centered_abs_mean": 0.23780983984470366,
"signal/batch_coverage_25/group_std_mean": 0.30095354914665223,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03264134675264359,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0034006804693490268,
"signal/batch_coverage_5/centered_abs_mean": 0.2414990097284317,
"signal/batch_coverage_5/group_std_mean": 0.3026431679725647,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03324813023209572,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0034534358885139225,
"signal/brier_reward/centered_abs_mean": 0.20246321558952332,
"signal/brier_reward/group_std_mean": 0.25215981602668763,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1943250447511673,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020246322453022002,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02902383990585804,
"signal/confidence_uniqueness_reward/group_std_mean": 0.058507513254880905,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027896419540047644,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029023839626461266,
"signal/format_reward/centered_abs_mean": 0.01649169921875,
"signal/format_reward/group_std_mean": 0.044241581857204434,
"signal/format_reward/group_zero_std_frac": 0.765625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07819900512695313,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008245849609375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36329306960105895,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4340018093585968,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.352882045507431,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03632930666208267,
"step": 35
},
{
"calibration/aurc": 0.5268840798603046,
"calibration/batch_distribution_entropy": 0.9881459018375685,
"calibration/buffer_distribution_entropy": 0.9507221731850362,
"calibration/confidence_entropy": 0.517089658035264,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.00275049115913556,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.22157705645715264,
"calibration/mean_confidence": 0.45853932202215536,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01123046875,
"completions/max_length": 1148.6,
"completions/max_terminated_length": 1148.6,
"completions/mean_length": 139.51337890625,
"completions/mean_terminated_length": 141.1068328857422,
"completions/min_length": 0.0,
"completions/min_terminated_length": 55.2,
"epoch": 0.128,
"grad_norm": 0.007969611324369907,
"learning_rate": 1e-06,
"loss": -0.0557,
"num_tokens": 132156294.0,
"reward": 0.8748344421386719,
"reward_std": 0.147980397939682,
"rewards/accuracy_reward": 0.4267578125,
"rewards/batch_coverage_0": 0.20069107115268708,
"rewards/batch_coverage_1": 0.20069107115268708,
"rewards/batch_coverage_10": 0.2380422294139862,
"rewards/batch_coverage_15": 0.24701308310031891,
"rewards/batch_coverage_20": 0.25846782326698303,
"rewards/batch_coverage_25": 0.2643324613571167,
"rewards/batch_coverage_5": 0.2187619239091873,
"rewards/brier_reward": 0.6851011157035828,
"rewards/confidence_uniqueness_reward": 0.9430533051490784,
"rewards/format_reward": 0.98837890625,
"rewards/frontier_entropy_batch_reward": -0.18829761445522308,
"signal/accuracy_reward/centered_abs_mean": 0.14984130859375,
"signal/accuracy_reward/group_std_mean": 0.19972037076950072,
"signal/accuracy_reward/group_zero_std_frac": 0.41875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6852742671966553,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.074920654296875,
"signal/advantage_abs_mean": 0.7205666780471802,
"signal/advantage_pre_scale_abs_mean": 0.10542576014995575,
"signal/advantage_pre_scale_std": 0.1632304906845093,
"signal/advantage_std": 0.9835788607597351,
"signal/batch_coverage_0/centered_abs_mean": 0.24876993000507355,
"signal/batch_coverage_0/group_std_mean": 0.3085006058216095,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03299525789916515,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0035574099980294704,
"signal/batch_coverage_1/centered_abs_mean": 0.24876993000507355,
"signal/batch_coverage_1/group_std_mean": 0.3085006058216095,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03299525789916515,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0035574099980294704,
"signal/batch_coverage_10/centered_abs_mean": 0.2526919633150101,
"signal/batch_coverage_10/group_std_mean": 0.31363179683685305,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.033503925427794456,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0036134951282292605,
"signal/batch_coverage_15/centered_abs_mean": 0.2509923607110977,
"signal/batch_coverage_15/group_std_mean": 0.311715692281723,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.033296512067317964,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0035891907289624216,
"signal/batch_coverage_20/centered_abs_mean": 0.2525142878293991,
"signal/batch_coverage_20/group_std_mean": 0.31447394490242003,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03351157084107399,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.003610954247415066,
"signal/batch_coverage_25/centered_abs_mean": 0.25000489354133604,
"signal/batch_coverage_25/group_std_mean": 0.3117690205574036,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03318729922175408,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.003575070109218359,
"signal/batch_coverage_5/centered_abs_mean": 0.2549441397190094,
"signal/batch_coverage_5/group_std_mean": 0.31646093130111697,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03380131050944328,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.003645701287314296,
"signal/brier_reward/centered_abs_mean": 0.23300331234931945,
"signal/brier_reward/group_std_mean": 0.2820957779884338,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21624882519245148,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023300331830978394,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030025603249669075,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06504980400204659,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028021814301609993,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003002560418099165,
"signal/format_reward/centered_abs_mean": 0.022149658203125,
"signal/format_reward/group_std_mean": 0.056480865180492404,
"signal/format_reward/group_zero_std_frac": 0.7125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10390360951423645,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0110748291015625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2851736843585968,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36849350333213804,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.2644934684038162,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02851736731827259,
"step": 40
},
{
"calibration/aurc": 0.3908897594631147,
"calibration/batch_distribution_entropy": 0.9793994890043638,
"calibration/buffer_distribution_entropy": 0.9639280501865457,
"calibration/confidence_entropy": 0.4823526568785848,
"calibration/coverage@0%": 0.0011881562319148685,
"calibration/coverage@1%": 0.0011881562319148685,
"calibration/coverage@10%": 0.0011881562319148685,
"calibration/coverage@15%": 0.0039604334596376415,
"calibration/coverage@20%": 0.10656705453700951,
"calibration/coverage@25%": 0.21268301524550642,
"calibration/coverage@30%": 0.2288754823822389,
"calibration/coverage@5%": 0.0011881562319148685,
"calibration/ece": 0.26584464815654074,
"calibration/mean_confidence": 0.4273948038821092,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0115234375,
"completions/max_length": 1287.2,
"completions/max_terminated_length": 1287.2,
"completions/mean_length": 156.25498046875,
"completions/mean_terminated_length": 158.07511291503906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 58.6,
"epoch": 0.144,
"grad_norm": 0.008597729727625847,
"learning_rate": 1e-06,
"loss": -0.0436,
"num_tokens": 148706777.0,
"reward": 0.9181379199028015,
"reward_std": 0.1555813044309616,
"rewards/accuracy_reward": 0.52119140625,
"rewards/batch_coverage_0": 0.19273079335689544,
"rewards/batch_coverage_1": 0.19273079335689544,
"rewards/batch_coverage_10": 0.2425982028245926,
"rewards/batch_coverage_15": 0.2533731073141098,
"rewards/batch_coverage_20": 0.26104374825954435,
"rewards/batch_coverage_25": 0.2662007987499237,
"rewards/batch_coverage_5": 0.21613748073577882,
"rewards/brier_reward": 0.6717984676361084,
"rewards/confidence_uniqueness_reward": 0.9413758754730225,
"rewards/format_reward": 0.98818359375,
"rewards/frontier_entropy_batch_reward": -0.2110186845064163,
"signal/accuracy_reward/centered_abs_mean": 0.163812255859375,
"signal/accuracy_reward/group_std_mean": 0.21589326560497285,
"signal/accuracy_reward/group_zero_std_frac": 0.38125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7454147100448608,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0819061279296875,
"signal/advantage_abs_mean": 0.7236922383308411,
"signal/advantage_pre_scale_abs_mean": 0.11080611050128937,
"signal/advantage_pre_scale_std": 0.17058975994586945,
"signal/advantage_std": 0.9835979700088501,
"signal/batch_coverage_0/centered_abs_mean": 0.266434383392334,
"signal/batch_coverage_0/group_std_mean": 0.32679077982902527,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03458985388278961,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.003810011688619852,
"signal/batch_coverage_1/centered_abs_mean": 0.266434383392334,
"signal/batch_coverage_1/group_std_mean": 0.32679077982902527,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03458985388278961,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.003810011688619852,
"signal/batch_coverage_10/centered_abs_mean": 0.2715885639190674,
"signal/batch_coverage_10/group_std_mean": 0.33486836552619936,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03524618148803711,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.00388371660374105,
"signal/batch_coverage_15/centered_abs_mean": 0.26918852925300596,
"signal/batch_coverage_15/group_std_mean": 0.33258755803108214,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.034940270334482194,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0038493959233164786,
"signal/batch_coverage_20/centered_abs_mean": 0.26530343294143677,
"signal/batch_coverage_20/group_std_mean": 0.3288030087947845,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.034423737227916716,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0037938390392810105,
"signal/batch_coverage_25/centered_abs_mean": 0.2609012186527252,
"signal/batch_coverage_25/group_std_mean": 0.3240866124629974,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03384031280875206,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.003730887267738581,
"signal/batch_coverage_5/centered_abs_mean": 0.2733203172683716,
"signal/batch_coverage_5/group_std_mean": 0.3354912757873535,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03547571823000908,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.003908480517566204,
"signal/brier_reward/centered_abs_mean": 0.24704381227493286,
"signal/brier_reward/group_std_mean": 0.29715535044670105,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22423461079597473,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.024704382568597794,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03163357488811016,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06734048649668693,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02875906378030777,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031633577309548855,
"signal/format_reward/centered_abs_mean": 0.022491455078125,
"signal/format_reward/group_std_mean": 0.057434971630573275,
"signal/format_reward/group_zero_std_frac": 0.70625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10224665850400924,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0112457275390625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3063051402568817,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38502358794212344,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.2783151209354401,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0306305143982172,
"step": 45
},
{
"calibration/aurc": 0.46575769813070034,
"calibration/batch_distribution_entropy": 0.9935435442933741,
"calibration/buffer_distribution_entropy": 0.9718765395402269,
"calibration/confidence_entropy": 0.49034085912160263,
"calibration/coverage@0%": 0.0015873701060211075,
"calibration/coverage@1%": 0.0015873701060211075,
"calibration/coverage@10%": 0.0015873701060211075,
"calibration/coverage@15%": 0.0015873701060211075,
"calibration/coverage@20%": 0.003944933956708731,
"calibration/coverage@25%": 0.004730788573604605,
"calibration/coverage@30%": 0.014161043976355096,
"calibration/coverage@5%": 0.0015873701060211075,
"calibration/ece": 0.18153789884384935,
"calibration/mean_confidence": 0.4796684327608073,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01484375,
"completions/max_length": 1009.0,
"completions/max_terminated_length": 1009.0,
"completions/mean_length": 163.23037109375,
"completions/mean_terminated_length": 165.69429931640624,
"completions/min_length": 0.0,
"completions/min_terminated_length": 61.8,
"epoch": 0.16,
"grad_norm": 0.007046389393508434,
"learning_rate": 1e-06,
"loss": -0.0617,
"num_tokens": 165399184.0,
"reward": 0.8970174312591552,
"reward_std": 0.16062215864658355,
"rewards/accuracy_reward": 0.45966796875,
"rewards/batch_coverage_0": 0.224856960773468,
"rewards/batch_coverage_1": 0.224856960773468,
"rewards/batch_coverage_10": 0.26703203916549684,
"rewards/batch_coverage_15": 0.2741366446018219,
"rewards/batch_coverage_20": 0.2853119790554047,
"rewards/batch_coverage_25": 0.2901387333869934,
"rewards/batch_coverage_5": 0.24517568647861482,
"rewards/brier_reward": 0.6930433869361877,
"rewards/confidence_uniqueness_reward": 0.9418343544006348,
"rewards/format_reward": 0.9849609375,
"rewards/frontier_entropy_batch_reward": -0.14689382612705232,
"signal/accuracy_reward/centered_abs_mean": 0.155767822265625,
"signal/accuracy_reward/group_std_mean": 0.20625517666339874,
"signal/accuracy_reward/group_zero_std_frac": 0.403125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6726519107818604,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0778839111328125,
"signal/advantage_abs_mean": 0.7129915833473206,
"signal/advantage_pre_scale_abs_mean": 0.11246829628944396,
"signal/advantage_pre_scale_std": 0.1778249204158783,
"signal/advantage_std": 0.9836406350135803,
"signal/batch_coverage_0/centered_abs_mean": 0.261820513010025,
"signal/batch_coverage_0/group_std_mean": 0.325615668296814,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.032354673743247984,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.003744033258408308,
"signal/batch_coverage_1/centered_abs_mean": 0.261820513010025,
"signal/batch_coverage_1/group_std_mean": 0.325615668296814,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.032354673743247984,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.003744033258408308,
"signal/batch_coverage_10/centered_abs_mean": 0.2718116581439972,
"signal/batch_coverage_10/group_std_mean": 0.3369123458862305,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03358059972524643,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0038869067560881377,
"signal/batch_coverage_15/centered_abs_mean": 0.2694369077682495,
"signal/batch_coverage_15/group_std_mean": 0.33361018300056455,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03328223079442978,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.003852947847917676,
"signal/batch_coverage_20/centered_abs_mean": 0.2705358564853668,
"signal/batch_coverage_20/group_std_mean": 0.33500627279281614,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03342964798212052,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0038686628453433515,
"signal/batch_coverage_25/centered_abs_mean": 0.27277548909187316,
"signal/batch_coverage_25/group_std_mean": 0.33765124082565307,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03371136784553528,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.003900689631700516,
"signal/batch_coverage_5/centered_abs_mean": 0.26610376834869387,
"signal/batch_coverage_5/group_std_mean": 0.3305723547935486,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03288530968129635,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0038052838295698165,
"signal/brier_reward/centered_abs_mean": 0.24320079386234283,
"signal/brier_reward/group_std_mean": 0.29356223344802856,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21018587350845336,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.024320079386234282,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033786237612366675,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07571721524000168,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02918182797729969,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033786237705498933,
"signal/format_reward/centered_abs_mean": 0.02840576171875,
"signal/format_reward/group_std_mean": 0.07038533240556717,
"signal/format_reward/group_zero_std_frac": 0.646875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12264087647199631,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014202880859375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23739419281482696,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3186127722263336,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.20512811839580536,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023739420250058173,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.562246010539858,
"eval_calibration/batch_distribution_entropy": 0.9193620611440112,
"eval_calibration/buffer_distribution_entropy": 0.9754673264817139,
"eval_calibration/confidence_entropy": 0.48591568590613476,
"eval_calibration/coverage@0%": 0.023941532258064516,
"eval_calibration/coverage@1%": 0.023941532258064516,
"eval_calibration/coverage@10%": 0.023941532258064516,
"eval_calibration/coverage@15%": 0.023941532258064516,
"eval_calibration/coverage@20%": 0.04813508064516129,
"eval_calibration/coverage@25%": 0.10282258064516128,
"eval_calibration/coverage@30%": 0.11844758064516128,
"eval_calibration/coverage@5%": 0.023941532258064516,
"eval_calibration/ece": 0.28053149868195854,
"eval_calibration/mean_confidence": 0.4684673564087854,
"eval_completions/clipped_ratio": 0.01037176724137931,
"eval_completions/max_length": 618.0,
"eval_completions/max_terminated_length": 618.0,
"eval_completions/mean_length": 173.83795928955078,
"eval_completions/mean_terminated_length": 175.71324920654297,
"eval_completions/min_length": 17.75,
"eval_completions/min_terminated_length": 74.5,
"eval_loss": 0.0,
"eval_num_tokens": 165399184.0,
"eval_reward": 0.7727401107549667,
"eval_reward_std": 0.23869208991527557,
"eval_rewards/accuracy_reward": 0.404296875,
"eval_rewards/batch_coverage_0": 0.12788349762558937,
"eval_rewards/batch_coverage_1": 0.12788349762558937,
"eval_rewards/batch_coverage_10": 0.1259235292673111,
"eval_rewards/batch_coverage_15": 0.1259235292673111,
"eval_rewards/batch_coverage_20": 0.11765317618846893,
"eval_rewards/batch_coverage_25": 0.10355318710207939,
"eval_rewards/batch_coverage_5": 0.12788349762558937,
"eval_rewards/brier_reward": 0.7332238703966141,
"eval_rewards/confidence_uniqueness_reward": 0.8892467767000198,
"eval_rewards/format_reward": 0.990234375,
"eval_rewards/frontier_entropy_batch_reward": -0.990234375,
"eval_runtime": 46.2682,
"eval_samples_per_second": 10.807,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4644775390625,
"eval_signal/accuracy_reward/group_std_mean": 0.48902085423469543,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9803617894649506,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23223876953125,
"eval_signal/advantage_abs_mean": 0.8980159163475037,
"eval_signal/advantage_pre_scale_abs_mean": 0.2143118791282177,
"eval_signal/advantage_pre_scale_std": 0.23657548055052757,
"eval_signal/advantage_std": 0.9877015054225922,
"eval_signal/batch_coverage_0/centered_abs_mean": 0.2970190942287445,
"eval_signal/batch_coverage_0/group_std_mean": 0.38584908097982407,
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017935237381607294,
"eval_signal/batch_coverage_0/weight": 0.014299999922513962,
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004247372969985008,
"eval_signal/batch_coverage_1/centered_abs_mean": 0.2970190942287445,
"eval_signal/batch_coverage_1/group_std_mean": 0.38584908097982407,
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017935237381607294,
"eval_signal/batch_coverage_1/weight": 0.014299999922513962,
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004247372969985008,
"eval_signal/batch_coverage_10/centered_abs_mean": 0.2857876867055893,
"eval_signal/batch_coverage_10/group_std_mean": 0.3715630993247032,
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01728172041475773,
"eval_signal/batch_coverage_10/weight": 0.014299999922513962,
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004086763830855489,
"eval_signal/batch_coverage_15/centered_abs_mean": 0.2857876867055893,
"eval_signal/batch_coverage_15/group_std_mean": 0.3715630993247032,
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01728172041475773,
"eval_signal/batch_coverage_15/weight": 0.014299999922513962,
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.004086763830855489,
"eval_signal/batch_coverage_20/centered_abs_mean": 0.2643692269921303,
"eval_signal/batch_coverage_20/group_std_mean": 0.3458894342184067,
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016003886004909873,
"eval_signal/batch_coverage_20/weight": 0.014299999922513962,
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0037804798339493573,
"eval_signal/batch_coverage_25/centered_abs_mean": 0.22446337342262268,
"eval_signal/batch_coverage_25/group_std_mean": 0.29489558935165405,
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013599664904177189,
"eval_signal/batch_coverage_25/weight": 0.014299999922513962,
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0032098261872306466,
"eval_signal/batch_coverage_5/centered_abs_mean": 0.2970190942287445,
"eval_signal/batch_coverage_5/group_std_mean": 0.38584908097982407,
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017935237381607294,
"eval_signal/batch_coverage_5/weight": 0.014299999922513962,
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004247372969985008,
"eval_signal/brier_reward/centered_abs_mean": 0.23577644675970078,
"eval_signal/brier_reward/group_std_mean": 0.28787870705127716,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09959037974476814,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02357764495536685,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05138667766004801,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08252080902457237,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021524199284613132,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005138667649589479,
"eval_signal/format_reward/centered_abs_mean": 0.0189208984375,
"eval_signal/format_reward/group_std_mean": 0.055242715403437614,
"eval_signal/format_reward/group_zero_std_frac": 0.6875,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.038965243846178055,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00946044921875,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0189208984375,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.055242715403437614,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6875,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.007793049095198512,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.001892089785542339,
"eval_steps_per_second": 0.086,
"step": 50
},
{
"calibration/aurc": 0.4058639576024353,
"calibration/batch_distribution_entropy": 0.9914696006819504,
"calibration/buffer_distribution_entropy": 0.9771811661947689,
"calibration/confidence_entropy": 0.4794375728490169,
"calibration/coverage@0%": 0.000392156862745098,
"calibration/coverage@1%": 0.000392156862745098,
"calibration/coverage@10%": 0.000392156862745098,
"calibration/coverage@15%": 0.000392156862745098,
"calibration/coverage@20%": 0.004321429947224469,
"calibration/coverage@25%": 0.032219268847028006,
"calibration/coverage@30%": 0.17706864497969316,
"calibration/coverage@5%": 0.000392156862745098,
"calibration/ece": 0.2033278974465818,
"calibration/mean_confidence": 0.5026187365209009,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00908203125,
"completions/max_length": 1191.4,
"completions/max_terminated_length": 1191.4,
"completions/mean_length": 178.840625,
"completions/mean_terminated_length": 180.475341796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 65.2,
"epoch": 0.176,
"grad_norm": 0.009468083269894123,
"learning_rate": 1e-06,
"loss": -0.0364,
"num_tokens": 182467632.0,
"reward": 0.9074610710144043,
"reward_std": 0.14233968853950502,
"rewards/accuracy_reward": 0.4705078125,
"rewards/batch_coverage_0": 0.2374819278717041,
"rewards/batch_coverage_1": 0.2374819278717041,
"rewards/batch_coverage_10": 0.28763837218284605,
"rewards/batch_coverage_15": 0.30320699214935304,
"rewards/batch_coverage_20": 0.3154535412788391,
"rewards/batch_coverage_25": 0.3183199048042297,
"rewards/batch_coverage_5": 0.2596494257450104,
"rewards/brier_reward": 0.7119304656982421,
"rewards/confidence_uniqueness_reward": 0.9463279843330383,
"rewards/format_reward": 0.99091796875,
"rewards/frontier_entropy_batch_reward": -0.17094670236110687,
"signal/accuracy_reward/centered_abs_mean": 0.14677734375,
"signal/accuracy_reward/group_std_mean": 0.1891311824321747,
"signal/accuracy_reward/group_zero_std_frac": 0.48125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7422645807266235,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.073388671875,
"signal/advantage_abs_mean": 0.7341462850570679,
"signal/advantage_pre_scale_abs_mean": 0.10340920239686965,
"signal/advantage_pre_scale_std": 0.15987979173660277,
"signal/advantage_std": 0.9834848642349243,
"signal/batch_coverage_0/centered_abs_mean": 0.2536461532115936,
"signal/batch_coverage_0/group_std_mean": 0.3149066686630249,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0370890274643898,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0036271399818360805,
"signal/batch_coverage_1/centered_abs_mean": 0.2536461532115936,
"signal/batch_coverage_1/group_std_mean": 0.3149066686630249,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0370890274643898,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0036271399818360805,
"signal/batch_coverage_10/centered_abs_mean": 0.2639505207538605,
"signal/batch_coverage_10/group_std_mean": 0.3261951506137848,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.038583753257989885,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0037744925823062657,
"signal/batch_coverage_15/centered_abs_mean": 0.26278347074985503,
"signal/batch_coverage_15/group_std_mean": 0.32544536590576173,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.038414908945560454,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0037578035145998,
"signal/batch_coverage_20/centered_abs_mean": 0.26741994023323057,
"signal/batch_coverage_20/group_std_mean": 0.3316928446292877,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03910925537347794,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.003824105253443122,
"signal/batch_coverage_25/centered_abs_mean": 0.2639124572277069,
"signal/batch_coverage_25/group_std_mean": 0.3276098668575287,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03858695030212402,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.003773948224261403,
"signal/batch_coverage_5/centered_abs_mean": 0.25576930344104765,
"signal/batch_coverage_5/group_std_mean": 0.31654787063598633,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03741679862141609,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0036575009115040304,
"signal/brier_reward/centered_abs_mean": 0.22843996584415435,
"signal/brier_reward/group_std_mean": 0.2793159544467926,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23320569694042206,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.022843996807932854,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025819224119186402,
"signal/confidence_uniqueness_reward/group_std_mean": 0.053821495920419696,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026197914406657218,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025819224305450917,
"signal/format_reward/centered_abs_mean": 0.017303466796875,
"signal/format_reward/group_std_mean": 0.04413560926914215,
"signal/format_reward/group_zero_std_frac": 0.775,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08711997866630554,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0086517333984375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26356192827224734,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3436621904373169,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.2690876364707947,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02635619342327118,
"step": 55
},
{
"calibration/aurc": 0.3629724345315162,
"calibration/batch_distribution_entropy": 0.9816236022150591,
"calibration/buffer_distribution_entropy": 0.9804879354242804,
"calibration/confidence_entropy": 0.4597148913096222,
"calibration/coverage@0%": 0.0015670956059039057,
"calibration/coverage@1%": 0.0015670956059039057,
"calibration/coverage@10%": 0.004306821633301166,
"calibration/coverage@15%": 0.009394884255610362,
"calibration/coverage@20%": 0.02705354499632832,
"calibration/coverage@25%": 0.07451777976847278,
"calibration/coverage@30%": 0.2995797753932156,
"calibration/coverage@5%": 0.0015670956059039057,
"calibration/ece": 0.15940265474416004,
"calibration/mean_confidence": 0.47236021548759366,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005078125,
"completions/max_length": 1090.8,
"completions/max_terminated_length": 1090.8,
"completions/mean_length": 192.31787109375,
"completions/mean_terminated_length": 193.29725036621093,
"completions/min_length": 0.0,
"completions/min_terminated_length": 63.2,
"epoch": 0.192,
"grad_norm": 0.006933207623660564,
"learning_rate": 1e-06,
"loss": -0.0216,
"num_tokens": 199251783.0,
"reward": 0.9257945537567138,
"reward_std": 0.13200957477092742,
"rewards/accuracy_reward": 0.496875,
"rewards/batch_coverage_0": 0.27818471789360044,
"rewards/batch_coverage_1": 0.27818471789360044,
"rewards/batch_coverage_10": 0.3267790138721466,
"rewards/batch_coverage_15": 0.3349075675010681,
"rewards/batch_coverage_20": 0.3430617153644562,
"rewards/batch_coverage_25": 0.34973667860031127,
"rewards/batch_coverage_5": 0.3075689971446991,
"rewards/brier_reward": 0.734505581855774,
"rewards/confidence_uniqueness_reward": 0.9484726190567017,
"rewards/format_reward": 0.99482421875,
"rewards/frontier_entropy_batch_reward": -0.20076338946819305,
"signal/accuracy_reward/centered_abs_mean": 0.14207763671875,
"signal/accuracy_reward/group_std_mean": 0.18903130292892456,
"signal/accuracy_reward/group_zero_std_frac": 0.45,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7877019166946411,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.071038818359375,
"signal/advantage_abs_mean": 0.7333375334739685,
"signal/advantage_pre_scale_abs_mean": 0.09644296765327454,
"signal/advantage_pre_scale_std": 0.14937117993831633,
"signal/advantage_std": 0.9833891034126282,
"signal/batch_coverage_0/centered_abs_mean": 0.2371144860982895,
"signal/batch_coverage_0/group_std_mean": 0.29555144906044006,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03813448995351791,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.003390737110748887,
"signal/batch_coverage_1/centered_abs_mean": 0.2371144860982895,
"signal/batch_coverage_1/group_std_mean": 0.29555144906044006,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03813448995351791,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.003390737110748887,
"signal/batch_coverage_10/centered_abs_mean": 0.2512255012989044,
"signal/batch_coverage_10/group_std_mean": 0.3117607593536377,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040200534462928775,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0035925245843827724,
"signal/batch_coverage_15/centered_abs_mean": 0.25101412534713746,
"signal/batch_coverage_15/group_std_mean": 0.31150234341621397,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04012797474861145,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.003589502023532987,
"signal/batch_coverage_20/centered_abs_mean": 0.25290718078613283,
"signal/batch_coverage_20/group_std_mean": 0.31392589807510374,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.040450763702392575,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0036165726836770774,
"signal/batch_coverage_25/centered_abs_mean": 0.2513018786907196,
"signal/batch_coverage_25/group_std_mean": 0.3126278817653656,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04021292626857757,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.003593616746366024,
"signal/batch_coverage_5/centered_abs_mean": 0.2468347042798996,
"signal/batch_coverage_5/group_std_mean": 0.30652726292610166,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03960250541567802,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.00352973616681993,
"signal/brier_reward/centered_abs_mean": 0.21205961108207702,
"signal/brier_reward/group_std_mean": 0.26358293294906615,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.237126162648201,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.021205961331725122,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021224848553538324,
"signal/confidence_uniqueness_reward/group_std_mean": 0.041045262664556506,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0238847978413105,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002122484892606735,
"signal/format_reward/centered_abs_mean": 0.009942626953125,
"signal/format_reward/group_std_mean": 0.027782656624913214,
"signal/format_reward/group_zero_std_frac": 0.846875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05646195188164711,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0049713134765625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2922514736652374,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3685956597328186,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.32798022627830503,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029225147888064386,
"step": 60
},
{
"calibration/aurc": 0.2955570498534926,
"calibration/batch_distribution_entropy": 0.9753288303488776,
"calibration/buffer_distribution_entropy": 0.9818330363107535,
"calibration/confidence_entropy": 0.4696564086727891,
"calibration/coverage@0%": 0.004703580062161851,
"calibration/coverage@1%": 0.004703580062161851,
"calibration/coverage@10%": 0.10038985457196577,
"calibration/coverage@15%": 0.22431142319941683,
"calibration/coverage@20%": 0.38893749280534134,
"calibration/coverage@25%": 0.5214635015671635,
"calibration/coverage@30%": 0.5915759301002205,
"calibration/coverage@5%": 0.004703580062161851,
"calibration/ece": 0.15610340603473422,
"calibration/mean_confidence": 0.5436399052179712,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00458984375,
"completions/max_length": 885.2,
"completions/max_terminated_length": 885.2,
"completions/mean_length": 210.11533203125,
"completions/mean_terminated_length": 211.0801208496094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.4,
"epoch": 0.208,
"grad_norm": 0.007579752244055271,
"learning_rate": 1e-06,
"loss": -0.0188,
"num_tokens": 216435588.0,
"reward": 0.9540463805198669,
"reward_std": 0.1229624554514885,
"rewards/accuracy_reward": 0.54208984375,
"rewards/batch_coverage_0": 0.2904252469539642,
"rewards/batch_coverage_1": 0.2904252469539642,
"rewards/batch_coverage_10": 0.3460793435573578,
"rewards/batch_coverage_15": 0.35459370613098146,
"rewards/batch_coverage_20": 0.369259774684906,
"rewards/batch_coverage_25": 0.3725292026996613,
"rewards/batch_coverage_5": 0.3211548626422882,
"rewards/brier_reward": 0.7557557940483093,
"rewards/confidence_uniqueness_reward": 0.9494149565696717,
"rewards/format_reward": 0.99521484375,
"rewards/frontier_entropy_batch_reward": -0.18648923933506012,
"signal/accuracy_reward/centered_abs_mean": 0.127740478515625,
"signal/accuracy_reward/group_std_mean": 0.1696159452199936,
"signal/accuracy_reward/group_zero_std_frac": 0.515625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8017047643661499,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0638702392578125,
"signal/advantage_abs_mean": 0.7370152473449707,
"signal/advantage_pre_scale_abs_mean": 0.08942593038082122,
"signal/advantage_pre_scale_std": 0.1426718145608902,
"signal/advantage_std": 0.9832539916038513,
"signal/batch_coverage_0/centered_abs_mean": 0.21396125555038453,
"signal/batch_coverage_0/group_std_mean": 0.2671938180923462,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03842084556818008,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.003059645928442478,
"signal/batch_coverage_1/centered_abs_mean": 0.21396125555038453,
"signal/batch_coverage_1/group_std_mean": 0.2671938180923462,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03842084556818008,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.003059645928442478,
"signal/batch_coverage_10/centered_abs_mean": 0.22922752797603607,
"signal/batch_coverage_10/group_std_mean": 0.28651703596115113,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04119753390550614,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.003277953574433923,
"signal/batch_coverage_15/centered_abs_mean": 0.23158512711524964,
"signal/batch_coverage_15/group_std_mean": 0.28928992748260496,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04162941426038742,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0033116671722382305,
"signal/batch_coverage_20/centered_abs_mean": 0.23296157717704774,
"signal/batch_coverage_20/group_std_mean": 0.29139900803565977,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04184585437178612,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0033313506282866,
"signal/batch_coverage_25/centered_abs_mean": 0.23438824713230133,
"signal/batch_coverage_25/group_std_mean": 0.2934773325920105,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04214929640293121,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0033517518546432256,
"signal/batch_coverage_5/centered_abs_mean": 0.2227215588092804,
"signal/batch_coverage_5/group_std_mean": 0.27792264223098756,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04000507667660713,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0031849182676523926,
"signal/brier_reward/centered_abs_mean": 0.18360722959041595,
"signal/brier_reward/group_std_mean": 0.23183298110961914,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2296048790216446,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.018360722810029984,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019582030177116395,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03763454332947731,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02437414266169071,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001958203059621155,
"signal/format_reward/centered_abs_mean": 0.009185791015625,
"signal/format_reward/group_std_mean": 0.025268962234258653,
"signal/format_reward/group_zero_std_frac": 0.8625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05644859969615936,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0045928955078125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27716624140739443,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35632293224334716,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.34786927700042725,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027716624736785888,
"step": 65
},
{
"calibration/aurc": 0.2965983333644823,
"calibration/batch_distribution_entropy": 0.9823246202723134,
"calibration/buffer_distribution_entropy": 0.9834847325184812,
"calibration/confidence_entropy": 0.4621176100001009,
"calibration/coverage@0%": 0.008615939526495529,
"calibration/coverage@1%": 0.008615939526495529,
"calibration/coverage@10%": 0.1707962088945167,
"calibration/coverage@15%": 0.2769310463911592,
"calibration/coverage@20%": 0.37681669928245265,
"calibration/coverage@25%": 0.48340585549288206,
"calibration/coverage@30%": 0.601358351559802,
"calibration/coverage@5%": 0.05362572426230765,
"calibration/ece": 0.15821446691039592,
"calibration/mean_confidence": 0.5084448069151312,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0029296875,
"completions/max_length": 999.0,
"completions/max_terminated_length": 999.0,
"completions/mean_length": 219.26708984375,
"completions/mean_terminated_length": 219.91590270996093,
"completions/min_length": 0.0,
"completions/min_terminated_length": 85.6,
"epoch": 0.224,
"grad_norm": 0.006702653598040342,
"learning_rate": 1e-06,
"loss": -0.0129,
"num_tokens": 233834067.0,
"reward": 0.9355417966842652,
"reward_std": 0.11460374146699906,
"rewards/accuracy_reward": 0.49541015625,
"rewards/batch_coverage_0": 0.3303173840045929,
"rewards/batch_coverage_1": 0.3303173840045929,
"rewards/batch_coverage_10": 0.38841472268104554,
"rewards/batch_coverage_15": 0.39242235422134397,
"rewards/batch_coverage_20": 0.3987464547157288,
"rewards/batch_coverage_25": 0.4024514138698578,
"rewards/batch_coverage_5": 0.35994952321052553,
"rewards/brier_reward": 0.7808137893676758,
"rewards/confidence_uniqueness_reward": 0.9499208688735962,
"rewards/format_reward": 0.9970703125,
"rewards/frontier_entropy_batch_reward": -0.20989351868629455,
"signal/accuracy_reward/centered_abs_mean": 0.116143798828125,
"signal/accuracy_reward/group_std_mean": 0.15492962598800658,
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7869158387184143,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0580718994140625,
"signal/advantage_abs_mean": 0.7476185441017151,
"signal/advantage_pre_scale_abs_mean": 0.08492442816495896,
"signal/advantage_pre_scale_std": 0.13532426059246064,
"signal/advantage_std": 0.9831453442573548,
"signal/batch_coverage_0/centered_abs_mean": 0.18892920017242432,
"signal/batch_coverage_0/group_std_mean": 0.23911657631397248,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0369180828332901,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002701687626540661,
"signal/batch_coverage_1/centered_abs_mean": 0.18892920017242432,
"signal/batch_coverage_1/group_std_mean": 0.23911657631397248,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0369180828332901,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002701687626540661,
"signal/batch_coverage_10/centered_abs_mean": 0.2077297806739807,
"signal/batch_coverage_10/group_std_mean": 0.262596932053566,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04057376310229301,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0029705358669161797,
"signal/batch_coverage_15/centered_abs_mean": 0.20889460444450378,
"signal/batch_coverage_15/group_std_mean": 0.2641852557659149,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040815822780132294,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0029871928971260788,
"signal/batch_coverage_20/centered_abs_mean": 0.2125068187713623,
"signal/batch_coverage_20/group_std_mean": 0.269322806596756,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.041476115584373474,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0030388474464416505,
"signal/batch_coverage_25/centered_abs_mean": 0.21426658630371093,
"signal/batch_coverage_25/group_std_mean": 0.27135098576545713,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04181862398982048,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.003064012061804533,
"signal/batch_coverage_5/centered_abs_mean": 0.19771002233028412,
"signal/batch_coverage_5/group_std_mean": 0.24965128302574158,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03862960487604141,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0028272532392293213,
"signal/brier_reward/centered_abs_mean": 0.1645033210515976,
"signal/brier_reward/group_std_mean": 0.21097516715526582,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22423305213451386,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016450332850217818,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01771753653883934,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030995216965675355,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024135235324501993,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017717537004500628,
"signal/format_reward/centered_abs_mean": 0.0056640625,
"signal/format_reward/group_std_mean": 0.016236505843698977,
"signal/format_reward/group_zero_std_frac": 0.909375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.038613373041152955,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00283203125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2941046953201294,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3701900064945221,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4015364408493042,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02941046915948391,
"step": 70
},
{
"calibration/aurc": 0.3330424907967232,
"calibration/batch_distribution_entropy": 0.9678999886467642,
"calibration/buffer_distribution_entropy": 0.9849417318548467,
"calibration/confidence_entropy": 0.47655475637605293,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.1390625,
"calibration/coverage@15%": 0.1921875,
"calibration/coverage@20%": 0.2574938725490196,
"calibration/coverage@25%": 0.31924479166666664,
"calibration/coverage@30%": 0.4216636029411765,
"calibration/coverage@5%": 0.100390625,
"calibration/ece": 0.1458545029740423,
"calibration/mean_confidence": 0.5140984260384581,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00126953125,
"completions/max_length": 785.6,
"completions/max_terminated_length": 785.6,
"completions/mean_length": 225.28369140625,
"completions/mean_terminated_length": 225.57015075683594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.8,
"epoch": 0.24,
"grad_norm": 0.006990624126046896,
"learning_rate": 1e-06,
"loss": -0.0024,
"num_tokens": 251392652.0,
"reward": 0.9652495741844177,
"reward_std": 0.10869128853082657,
"rewards/accuracy_reward": 0.5564453125,
"rewards/batch_coverage_0": 0.34254276752471924,
"rewards/batch_coverage_1": 0.34254276752471924,
"rewards/batch_coverage_10": 0.366526848077774,
"rewards/batch_coverage_15": 0.3773797333240509,
"rewards/batch_coverage_20": 0.3843431532382965,
"rewards/batch_coverage_25": 0.391269850730896,
"rewards/batch_coverage_5": 0.3533271372318268,
"rewards/brier_reward": 0.778117573261261,
"rewards/confidence_uniqueness_reward": 0.9503395080566406,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_entropy_batch_reward": -0.21713619232177733,
"signal/accuracy_reward/centered_abs_mean": 0.121923828125,
"signal/accuracy_reward/group_std_mean": 0.1602822333574295,
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8842084884643555,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0609619140625,
"signal/advantage_abs_mean": 0.7662800312042236,
"signal/advantage_pre_scale_abs_mean": 0.08325667977333069,
"signal/advantage_pre_scale_std": 0.13016353249549867,
"signal/advantage_std": 0.9830474019050598,
"signal/batch_coverage_0/centered_abs_mean": 0.18846372067928313,
"signal/batch_coverage_0/group_std_mean": 0.23571240305900573,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03953521251678467,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0026950312312692404,
"signal/batch_coverage_1/centered_abs_mean": 0.18846372067928313,
"signal/batch_coverage_1/group_std_mean": 0.23571240305900573,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03953521251678467,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0026950312312692404,
"signal/batch_coverage_10/centered_abs_mean": 0.19527166485786437,
"signal/batch_coverage_10/group_std_mean": 0.24427315294742585,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040969235450029375,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0027923848014324904,
"signal/batch_coverage_15/centered_abs_mean": 0.19935429692268372,
"signal/batch_coverage_15/group_std_mean": 0.2496377408504486,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04183716475963593,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002850766479969025,
"signal/batch_coverage_20/centered_abs_mean": 0.20147362351417542,
"signal/batch_coverage_20/group_std_mean": 0.2527039110660553,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04230139851570129,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0028810727875679732,
"signal/batch_coverage_25/centered_abs_mean": 0.2088294118642807,
"signal/batch_coverage_25/group_std_mean": 0.26152198314666747,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04382089376449585,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002986260550096631,
"signal/batch_coverage_5/centered_abs_mean": 0.19283068180084229,
"signal/batch_coverage_5/group_std_mean": 0.24108233153820038,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04044611379504204,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0027574787847697733,
"signal/brier_reward/centered_abs_mean": 0.15715266466140748,
"signal/brier_reward/group_std_mean": 0.20129252970218658,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2302556663751602,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01571526676416397,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015452152304351329,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023464472219347953,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022645176202058793,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015452152118086814,
"signal/format_reward/centered_abs_mean": 0.00264892578125,
"signal/format_reward/group_std_mean": 0.007733980193734169,
"signal/format_reward/group_zero_std_frac": 0.95625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01969538666307926,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2867373704910278,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36399008631706237,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4199453890323639,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02867373712360859,
"step": 75
},
{
"calibration/aurc": 0.26090237641990177,
"calibration/batch_distribution_entropy": 0.9734002526952843,
"calibration/buffer_distribution_entropy": 0.9856670911764873,
"calibration/confidence_entropy": 0.46283773268623846,
"calibration/coverage@0%": 0.0140625,
"calibration/coverage@1%": 0.0140625,
"calibration/coverage@10%": 0.24609375,
"calibration/coverage@15%": 0.369921875,
"calibration/coverage@20%": 0.440234375,
"calibration/coverage@25%": 0.480078125,
"calibration/coverage@30%": 0.7054106531311154,
"calibration/coverage@5%": 0.090625,
"calibration/ece": 0.12688025020767357,
"calibration/mean_confidence": 0.5268819480757656,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 798.2,
"completions/max_terminated_length": 798.2,
"completions/mean_length": 221.2,
"completions/mean_terminated_length": 221.41583557128905,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.8,
"epoch": 0.256,
"grad_norm": 0.007445762399584055,
"learning_rate": 1e-06,
"loss": -0.0042,
"num_tokens": 268712556.0,
"reward": 0.9550706624984742,
"reward_std": 0.10047266483306885,
"rewards/accuracy_reward": 0.53466796875,
"rewards/batch_coverage_0": 0.35365639328956605,
"rewards/batch_coverage_1": 0.35365639328956605,
"rewards/batch_coverage_10": 0.398507171869278,
"rewards/batch_coverage_15": 0.40682188868522645,
"rewards/batch_coverage_20": 0.41643165946006777,
"rewards/batch_coverage_25": 0.4207874059677124,
"rewards/batch_coverage_5": 0.38032680153846743,
"rewards/brier_reward": 0.7843676567077636,
"rewards/confidence_uniqueness_reward": 0.9491483807563782,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_entropy_batch_reward": -0.2416835457086563,
"signal/accuracy_reward/centered_abs_mean": 0.110015869140625,
"signal/accuracy_reward/group_std_mean": 0.1448248639702797,
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.894112491607666,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0550079345703125,
"signal/advantage_abs_mean": 0.7612926959991455,
"signal/advantage_pre_scale_abs_mean": 0.0766275018453598,
"signal/advantage_pre_scale_std": 0.12247075140476227,
"signal/advantage_std": 0.9828827261924744,
"signal/batch_coverage_0/centered_abs_mean": 0.1827174574136734,
"signal/batch_coverage_0/group_std_mean": 0.2315205842256546,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04265338107943535,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002612859569489956,
"signal/batch_coverage_1/centered_abs_mean": 0.1827174574136734,
"signal/batch_coverage_1/group_std_mean": 0.2315205842256546,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04265338107943535,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002612859569489956,
"signal/batch_coverage_10/centered_abs_mean": 0.1913502722978592,
"signal/batch_coverage_10/group_std_mean": 0.24344587028026582,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04468824490904808,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002736308891326189,
"signal/batch_coverage_15/centered_abs_mean": 0.19306427836418152,
"signal/batch_coverage_15/group_std_mean": 0.2460806131362915,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.045067351311445236,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0027608192525804044,
"signal/batch_coverage_20/centered_abs_mean": 0.19285837113857268,
"signal/batch_coverage_20/group_std_mean": 0.24617846310138702,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.045022976398468015,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0027578747365623714,
"signal/batch_coverage_25/centered_abs_mean": 0.19478027522563934,
"signal/batch_coverage_25/group_std_mean": 0.24878009259700776,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04542726948857308,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002785358065739274,
"signal/batch_coverage_5/centered_abs_mean": 0.19009583592414855,
"signal/batch_coverage_5/group_std_mean": 0.24117153286933898,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.044343578070402144,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002718370407819748,
"signal/brier_reward/centered_abs_mean": 0.14258549511432647,
"signal/brier_reward/group_std_mean": 0.18335953056812287,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2328980028629303,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014258549734950065,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016221878677606584,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023178667202591895,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026576806232333182,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016221878584474325,
"signal/format_reward/centered_abs_mean": 0.00189208984375,
"signal/format_reward/group_std_mean": 0.005524271540343762,
"signal/format_reward/group_zero_std_frac": 0.96875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015373882651329041,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30239012837409973,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37879385948181155,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4947479128837585,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030239013954997063,
"step": 80
},
{
"calibration/aurc": 0.35560353793626626,
"calibration/batch_distribution_entropy": 0.9819774641556854,
"calibration/buffer_distribution_entropy": 0.986498195012946,
"calibration/confidence_entropy": 0.4855185970826679,
"calibration/coverage@0%": 0.015255017674878168,
"calibration/coverage@1%": 0.015255017674878168,
"calibration/coverage@10%": 0.14282666748206133,
"calibration/coverage@15%": 0.2285940257952496,
"calibration/coverage@20%": 0.2971340736637121,
"calibration/coverage@25%": 0.34332502650032615,
"calibration/coverage@30%": 0.3945764864164844,
"calibration/coverage@5%": 0.06731295086527762,
"calibration/ece": 0.131870000208108,
"calibration/mean_confidence": 0.502433525846682,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0021484375,
"completions/max_length": 762.2,
"completions/max_terminated_length": 762.2,
"completions/mean_length": 230.17255859375,
"completions/mean_terminated_length": 230.6666259765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 103.8,
"epoch": 0.272,
"grad_norm": 0.006748313549906015,
"learning_rate": 1e-06,
"loss": -0.0084,
"num_tokens": 286035219.0,
"reward": 0.9507703185081482,
"reward_std": 0.10085932165384293,
"rewards/accuracy_reward": 0.5171875,
"rewards/batch_coverage_0": 0.34573878049850465,
"rewards/batch_coverage_1": 0.34573878049850465,
"rewards/batch_coverage_10": 0.3782036781311035,
"rewards/batch_coverage_15": 0.39221201539039613,
"rewards/batch_coverage_20": 0.39762226939201356,
"rewards/batch_coverage_25": 0.400358122587204,
"rewards/batch_coverage_5": 0.36215776205062866,
"rewards/brier_reward": 0.7837765336036682,
"rewards/confidence_uniqueness_reward": 0.9518365740776062,
"rewards/format_reward": 0.9978515625,
"rewards/frontier_entropy_batch_reward": -0.1780557692050934,
"signal/accuracy_reward/centered_abs_mean": 0.10745849609375,
"signal/accuracy_reward/group_std_mean": 0.1406096488237381,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8927412509918213,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.053729248046875,
"signal/advantage_abs_mean": 0.757426643371582,
"signal/advantage_pre_scale_abs_mean": 0.07568480670452118,
"signal/advantage_pre_scale_std": 0.12425664216279983,
"signal/advantage_std": 0.9828558802604676,
"signal/batch_coverage_0/centered_abs_mean": 0.16644979119300843,
"signal/batch_coverage_0/group_std_mean": 0.21065367460250856,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03954842537641525,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0023802319541573525,
"signal/batch_coverage_1/centered_abs_mean": 0.16644979119300843,
"signal/batch_coverage_1/group_std_mean": 0.21065367460250856,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03954842537641525,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0023802319541573525,
"signal/batch_coverage_10/centered_abs_mean": 0.17243632078170776,
"signal/batch_coverage_10/group_std_mean": 0.21887100636959075,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04097995311021805,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002465839311480522,
"signal/batch_coverage_15/centered_abs_mean": 0.1725291758775711,
"signal/batch_coverage_15/group_std_mean": 0.22031511068344117,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.041006506979465486,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.00246716714464128,
"signal/batch_coverage_20/centered_abs_mean": 0.17189605236053468,
"signal/batch_coverage_20/group_std_mean": 0.22016339004039764,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04085053354501724,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002458113431930542,
"signal/batch_coverage_25/centered_abs_mean": 0.17436989545822143,
"signal/batch_coverage_25/group_std_mean": 0.22325910925865172,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.041441477835178375,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0024934894870966675,
"signal/batch_coverage_5/centered_abs_mean": 0.17018766105175018,
"signal/batch_coverage_5/group_std_mean": 0.21510340869426728,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04042254015803337,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024336835369467734,
"signal/brier_reward/centered_abs_mean": 0.1363950252532959,
"signal/brier_reward/group_std_mean": 0.1755893498659134,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22622712850570678,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01363950278609991,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01479104645550251,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02456299029290676,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024606984853744508,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014791046734899283,
"signal/format_reward/centered_abs_mean": 0.00413818359375,
"signal/format_reward/group_std_mean": 0.011480780877172947,
"signal/format_reward/group_zero_std_frac": 0.9375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03427073359489441,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002069091796875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24672031104564668,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31942119598388674,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41071382761001585,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02467203177511692,
"step": 85
},
{
"calibration/aurc": 0.292581212532501,
"calibration/batch_distribution_entropy": 0.9773755694865226,
"calibration/buffer_distribution_entropy": 0.987680797883996,
"calibration/confidence_entropy": 0.4689914363857097,
"calibration/coverage@0%": 0.01918194032750086,
"calibration/coverage@1%": 0.01918194032750086,
"calibration/coverage@10%": 0.15126897891005714,
"calibration/coverage@15%": 0.1884479772217106,
"calibration/coverage@20%": 0.22212513729806993,
"calibration/coverage@25%": 0.3336199835242316,
"calibration/coverage@30%": 0.4916374863301485,
"calibration/coverage@5%": 0.042665306276620234,
"calibration/ece": 0.1441450143384424,
"calibration/mean_confidence": 0.5331028913418352,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00205078125,
"completions/max_length": 935.4,
"completions/max_terminated_length": 935.4,
"completions/mean_length": 227.60908203125,
"completions/mean_terminated_length": 228.07676391601564,
"completions/min_length": 0.0,
"completions/min_terminated_length": 110.8,
"epoch": 0.288,
"grad_norm": 0.007270668167620897,
"learning_rate": 1e-06,
"loss": -0.0069,
"num_tokens": 303324112.0,
"reward": 0.9605869770050048,
"reward_std": 0.10319101065397263,
"rewards/accuracy_reward": 0.5412109375,
"rewards/batch_coverage_0": 0.3360299587249756,
"rewards/batch_coverage_1": 0.3360299587249756,
"rewards/batch_coverage_10": 0.3981724143028259,
"rewards/batch_coverage_15": 0.40614657998085024,
"rewards/batch_coverage_20": 0.41246947050094607,
"rewards/batch_coverage_25": 0.4149442493915558,
"rewards/batch_coverage_5": 0.3723661780357361,
"rewards/brier_reward": 0.7803151965141296,
"rewards/confidence_uniqueness_reward": 0.9507789015769958,
"rewards/format_reward": 0.9978515625,
"rewards/frontier_entropy_batch_reward": -0.20322760939598083,
"signal/accuracy_reward/centered_abs_mean": 0.108984375,
"signal/accuracy_reward/group_std_mean": 0.1481735274195671,
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8605485916137695,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0544921875,
"signal/advantage_abs_mean": 0.739544403553009,
"signal/advantage_pre_scale_abs_mean": 0.07533028572797776,
"signal/advantage_pre_scale_std": 0.12444878071546554,
"signal/advantage_std": 0.982927656173706,
"signal/batch_coverage_0/centered_abs_mean": 0.15977685451507567,
"signal/batch_coverage_0/group_std_mean": 0.20317367017269133,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03621991276741028,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002284808969125152,
"signal/batch_coverage_1/centered_abs_mean": 0.15977685451507567,
"signal/batch_coverage_1/group_std_mean": 0.20317367017269133,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03621991276741028,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002284808969125152,
"signal/batch_coverage_10/centered_abs_mean": 0.17815843522548674,
"signal/batch_coverage_10/group_std_mean": 0.22847844064235687,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04042218551039696,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0025476655922830106,
"signal/batch_coverage_15/centered_abs_mean": 0.18043112754821777,
"signal/batch_coverage_15/group_std_mean": 0.2313321739435196,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04092831686139107,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.00258016511797905,
"signal/batch_coverage_20/centered_abs_mean": 0.18301819264888763,
"signal/batch_coverage_20/group_std_mean": 0.23511843979358674,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04153538718819618,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002617160230875015,
"signal/batch_coverage_25/centered_abs_mean": 0.1800421804189682,
"signal/batch_coverage_25/group_std_mean": 0.23162541687488555,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04084615483880043,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0025746031664311887,
"signal/batch_coverage_5/centered_abs_mean": 0.17082005143165588,
"signal/batch_coverage_5/group_std_mean": 0.21806212067604064,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03876911178231239,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024427266791462897,
"signal/brier_reward/centered_abs_mean": 0.13430711179971694,
"signal/brier_reward/group_std_mean": 0.17480315566062926,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21253706514835358,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013430711254477501,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015577200055122375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.025766569748520853,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024803223088383675,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015577200334519147,
"signal/format_reward/centered_abs_mean": 0.004150390625,
"signal/format_reward/group_std_mean": 0.011817089095711709,
"signal/format_reward/group_zero_std_frac": 0.934375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03272081278264523,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0020751953125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27212412357330323,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3471516013145447,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.43434072136878965,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027212412655353548,
"step": 90
},
{
"calibration/aurc": 0.2630640235337947,
"calibration/batch_distribution_entropy": 0.964956944555903,
"calibration/buffer_distribution_entropy": 0.9880598273968533,
"calibration/confidence_entropy": 0.4528542483114846,
"calibration/coverage@0%": 0.02978320095161352,
"calibration/coverage@1%": 0.02978320095161352,
"calibration/coverage@10%": 0.14730286635202025,
"calibration/coverage@15%": 0.22133225893096964,
"calibration/coverage@20%": 0.285966002839492,
"calibration/coverage@25%": 0.5375250374122251,
"calibration/coverage@30%": 0.6992985687425655,
"calibration/coverage@5%": 0.08540424388933655,
"calibration/ece": 0.10603972376971078,
"calibration/mean_confidence": 0.5529362740863469,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00166015625,
"completions/max_length": 921.4,
"completions/max_terminated_length": 921.4,
"completions/mean_length": 224.75595703125,
"completions/mean_terminated_length": 225.1295593261719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 102.2,
"epoch": 0.304,
"grad_norm": 0.00729918060824275,
"learning_rate": 1e-06,
"loss": -0.0048,
"num_tokens": 320555565.0,
"reward": 0.9541943192481994,
"reward_std": 0.10027135163545609,
"rewards/accuracy_reward": 0.5359375,
"rewards/batch_coverage_0": 0.34543625116348264,
"rewards/batch_coverage_1": 0.34543625116348264,
"rewards/batch_coverage_10": 0.38589795827865603,
"rewards/batch_coverage_15": 0.3934217095375061,
"rewards/batch_coverage_20": 0.4026259660720825,
"rewards/batch_coverage_25": 0.4048985719680786,
"rewards/batch_coverage_5": 0.36726756691932677,
"rewards/brier_reward": 0.7719451546669006,
"rewards/confidence_uniqueness_reward": 0.9500939846038818,
"rewards/format_reward": 0.99833984375,
"rewards/frontier_entropy_batch_reward": -0.22971556186676026,
"signal/accuracy_reward/centered_abs_mean": 0.1065185546875,
"signal/accuracy_reward/group_std_mean": 0.14071860015392304,
"signal/accuracy_reward/group_zero_std_frac": 0.59375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.86371351480484,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05325927734375,
"signal/advantage_abs_mean": 0.7625090956687928,
"signal/advantage_pre_scale_abs_mean": 0.07567221075296401,
"signal/advantage_pre_scale_std": 0.12226974666118622,
"signal/advantage_std": 0.9828738808631897,
"signal/batch_coverage_0/centered_abs_mean": 0.1651271402835846,
"signal/batch_coverage_0/group_std_mean": 0.20846222341060638,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.038963142409920694,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0023613180965185165,
"signal/batch_coverage_1/centered_abs_mean": 0.1651271402835846,
"signal/batch_coverage_1/group_std_mean": 0.20846222341060638,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.038963142409920694,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0023613180965185165,
"signal/batch_coverage_10/centered_abs_mean": 0.17702984809875488,
"signal/batch_coverage_10/group_std_mean": 0.22490586936473847,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.041466080397367475,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002531526843085885,
"signal/batch_coverage_15/centered_abs_mean": 0.17536805868148803,
"signal/batch_coverage_15/group_std_mean": 0.22317201495170594,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04111176505684853,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0025077632162719965,
"signal/batch_coverage_20/centered_abs_mean": 0.17413158118724822,
"signal/batch_coverage_20/group_std_mean": 0.2223893940448761,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04070866405963898,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0024900815915316343,
"signal/batch_coverage_25/centered_abs_mean": 0.17070959508419037,
"signal/batch_coverage_25/group_std_mean": 0.21875600814819335,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.039876680821180344,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002441147156059742,
"signal/batch_coverage_5/centered_abs_mean": 0.17223967611789703,
"signal/batch_coverage_5/group_std_mean": 0.21767829358577728,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0404712088406086,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002463027322664857,
"signal/brier_reward/centered_abs_mean": 0.1345919907093048,
"signal/brier_reward/group_std_mean": 0.17346469461917877,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21938477456569672,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013459199480712414,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015962999872863294,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024774506315588952,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026566693931818007,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015963000478222966,
"signal/format_reward/centered_abs_mean": 0.003204345703125,
"signal/format_reward/group_std_mean": 0.0090549532789737,
"signal/format_reward/group_zero_std_frac": 0.95,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.025845942366868258,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0016021728515625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28979048132896423,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3667328774929047,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4797462522983551,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028979047015309334,
"step": 95
},
{
"calibration/aurc": 0.21323978911458616,
"calibration/batch_distribution_entropy": 0.9754091138368974,
"calibration/buffer_distribution_entropy": 0.9878890606584776,
"calibration/confidence_entropy": 0.4707118432324401,
"calibration/coverage@0%": 0.04375458659491194,
"calibration/coverage@1%": 0.05078583659491194,
"calibration/coverage@10%": 0.33517153864970645,
"calibration/coverage@15%": 0.42579959637964776,
"calibration/coverage@20%": 0.556768285225049,
"calibration/coverage@25%": 0.6622897810665362,
"calibration/coverage@30%": 0.730684319960861,
"calibration/coverage@5%": 0.19531708659491193,
"calibration/ece": 0.15941081118757158,
"calibration/mean_confidence": 0.5433335774149639,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 901.0,
"completions/max_terminated_length": 901.0,
"completions/mean_length": 216.8244140625,
"completions/mean_terminated_length": 217.03798828125,
"completions/min_length": 20.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.32,
"grad_norm": 0.0067256358452141285,
"learning_rate": 1e-06,
"loss": -0.0037,
"num_tokens": 337864551.0,
"reward": 0.9643454909324646,
"reward_std": 0.08362279087305069,
"rewards/accuracy_reward": 0.5408203125,
"rewards/batch_coverage_0": 0.40262268781661986,
"rewards/batch_coverage_1": 0.40262268781661986,
"rewards/batch_coverage_10": 0.4292051911354065,
"rewards/batch_coverage_15": 0.43654964566230775,
"rewards/batch_coverage_20": 0.4390207827091217,
"rewards/batch_coverage_25": 0.4412606120109558,
"rewards/batch_coverage_5": 0.4163574159145355,
"rewards/brier_reward": 0.7956306457519531,
"rewards/confidence_uniqueness_reward": 0.9509375929832459,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_entropy_batch_reward": -0.22670443654060363,
"signal/accuracy_reward/centered_abs_mean": 0.0785888671875,
"signal/accuracy_reward/group_std_mean": 0.10648886114358902,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7305195569992066,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03929443359375,
"signal/advantage_abs_mean": 0.76407390832901,
"signal/advantage_pre_scale_abs_mean": 0.06325442418456077,
"signal/advantage_pre_scale_std": 0.1052427500486374,
"signal/advantage_std": 0.9826571702957153,
"signal/batch_coverage_0/centered_abs_mean": 0.14719704687595367,
"signal/batch_coverage_0/group_std_mean": 0.1912107139825821,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03920513764023781,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021049177274107935,
"signal/batch_coverage_1/centered_abs_mean": 0.14719704687595367,
"signal/batch_coverage_1/group_std_mean": 0.1912107139825821,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03920513764023781,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021049177274107935,
"signal/batch_coverage_10/centered_abs_mean": 0.15465619862079621,
"signal/batch_coverage_10/group_std_mean": 0.2020500361919403,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04116538017988205,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002211583498865366,
"signal/batch_coverage_15/centered_abs_mean": 0.1558520555496216,
"signal/batch_coverage_15/group_std_mean": 0.20389397144317628,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04150298237800598,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0022286843974143266,
"signal/batch_coverage_20/centered_abs_mean": 0.15506626963615416,
"signal/batch_coverage_20/group_std_mean": 0.20311373770236968,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04127655476331711,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0022174476645886896,
"signal/batch_coverage_25/centered_abs_mean": 0.15622910261154174,
"signal/batch_coverage_25/group_std_mean": 0.2045728623867035,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04156334474682808,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002234076149761677,
"signal/batch_coverage_5/centered_abs_mean": 0.15119654536247254,
"signal/batch_coverage_5/group_std_mean": 0.19683580100536346,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04023168459534645,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021621106658130883,
"signal/brier_reward/centered_abs_mean": 0.11154842674732209,
"signal/brier_reward/group_std_mean": 0.1480434626340866,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20754066705703736,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011154843121767044,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014669315330684185,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020008804649114607,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02726968452334404,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014669314958155155,
"signal/format_reward/centered_abs_mean": 0.00177001953125,
"signal/format_reward/group_std_mean": 0.003914954699575901,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016244655288755894,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000885009765625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28998995423316953,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36479984521865844,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5398689031600952,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028998995944857598,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.4818202421936853,
"eval_calibration/batch_distribution_entropy": 0.9161004968263402,
"eval_calibration/buffer_distribution_entropy": 0.9880535073418304,
"eval_calibration/confidence_entropy": 0.4630894061433339,
"eval_calibration/coverage@0%": 0.046875,
"eval_calibration/coverage@1%": 0.046875,
"eval_calibration/coverage@10%": 0.046875,
"eval_calibration/coverage@15%": 0.046875,
"eval_calibration/coverage@20%": 0.0859375,
"eval_calibration/coverage@25%": 0.1328125,
"eval_calibration/coverage@30%": 0.2265625,
"eval_calibration/coverage@5%": 0.046875,
"eval_calibration/ece": 0.23660033525497487,
"eval_calibration/mean_confidence": 0.46879126181649067,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 449.25,
"eval_completions/max_terminated_length": 449.25,
"eval_completions/mean_length": 221.74993133544922,
"eval_completions/mean_terminated_length": 221.74993133544922,
"eval_completions/min_length": 111.75,
"eval_completions/min_terminated_length": 111.75,
"eval_loss": 0.0,
"eval_num_tokens": 337864551.0,
"eval_reward": 0.7951662689447403,
"eval_reward_std": 0.22639942914247513,
"eval_rewards/accuracy_reward": 0.421875,
"eval_rewards/batch_coverage_0": 0.1622033342719078,
"eval_rewards/batch_coverage_1": 0.1622033342719078,
"eval_rewards/batch_coverage_10": 0.16187801584601402,
"eval_rewards/batch_coverage_15": 0.1481264792382717,
"eval_rewards/batch_coverage_20": 0.13171643018722534,
"eval_rewards/batch_coverage_25": 0.11638518050312996,
"eval_rewards/batch_coverage_5": 0.1622033342719078,
"eval_rewards/brier_reward": 0.7932350784540176,
"eval_rewards/confidence_uniqueness_reward": 0.899658203125,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 22.8241,
"eval_samples_per_second": 21.907,
"eval_signal/accuracy_reward/centered_abs_mean": 0.47021484375,
"eval_signal/accuracy_reward/group_std_mean": 0.4921262636780739,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0398327857255936,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.235107421875,
"eval_signal/advantage_abs_mean": 0.9330623000860214,
"eval_signal/advantage_pre_scale_abs_mean": 0.21175596863031387,
"eval_signal/advantage_pre_scale_std": 0.22392144799232483,
"eval_signal/advantage_std": 0.9876809418201447,
"eval_signal/batch_coverage_0/centered_abs_mean": 0.30005283653736115,
"eval_signal/batch_coverage_0/group_std_mean": 0.3642084077000618,
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018966381903737783,
"eval_signal/batch_coverage_0/weight": 0.014299999922513962,
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004290755605325103,
"eval_signal/batch_coverage_1/centered_abs_mean": 0.30005283653736115,
"eval_signal/batch_coverage_1/group_std_mean": 0.3642084077000618,
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018966381903737783,
"eval_signal/batch_coverage_1/weight": 0.014299999922513962,
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004290755605325103,
"eval_signal/batch_coverage_10/centered_abs_mean": 0.2983648404479027,
"eval_signal/batch_coverage_10/group_std_mean": 0.36220937967300415,
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.018857899587601423,
"eval_signal/batch_coverage_10/weight": 0.014299999922513962,
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004266617470420897,
"eval_signal/batch_coverage_15/centered_abs_mean": 0.2660781927406788,
"eval_signal/batch_coverage_15/group_std_mean": 0.3240368664264679,
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016829160042107105,
"eval_signal/batch_coverage_15/weight": 0.014299999922513962,
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.003804918087553233,
"eval_signal/batch_coverage_20/centered_abs_mean": 0.2327834814786911,
"eval_signal/batch_coverage_20/group_std_mean": 0.28654681891202927,
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014746756991371512,
"eval_signal/batch_coverage_20/weight": 0.014299999922513962,
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0033288037520833313,
"eval_signal/batch_coverage_25/centered_abs_mean": 0.19945495203137398,
"eval_signal/batch_coverage_25/group_std_mean": 0.248090460896492,
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012607906712219119,
"eval_signal/batch_coverage_25/weight": 0.014299999922513962,
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0028522057691589,
"eval_signal/batch_coverage_5/centered_abs_mean": 0.30005283653736115,
"eval_signal/batch_coverage_5/group_std_mean": 0.3642084077000618,
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.018966381903737783,
"eval_signal/batch_coverage_5/weight": 0.014299999922513962,
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004290755605325103,
"eval_signal/brier_reward/centered_abs_mean": 0.19953547045588493,
"eval_signal/brier_reward/group_std_mean": 0.24987618252635002,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08829442970454693,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019953548442572355,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.040130615234375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04819970764219761,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017783273942768574,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0040130615234375,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.175,
"step": 100
},
{
"calibration/aurc": 0.2685604926137561,
"calibration/batch_distribution_entropy": 0.9600400894076688,
"calibration/buffer_distribution_entropy": 0.9896598366446046,
"calibration/confidence_entropy": 0.47332780847956474,
"calibration/coverage@0%": 0.029347411481715973,
"calibration/coverage@1%": 0.029347411481715973,
"calibration/coverage@10%": 0.09506503371896705,
"calibration/coverage@15%": 0.1670885644497525,
"calibration/coverage@20%": 0.29349015890602814,
"calibration/coverage@25%": 0.5120462328767122,
"calibration/coverage@30%": 0.6164479640315029,
"calibration/coverage@5%": 0.046541038932696364,
"calibration/ece": 0.12776021875721216,
"calibration/mean_confidence": 0.5172638957764836,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 835.2,
"completions/max_terminated_length": 835.2,
"completions/mean_length": 219.88203125,
"completions/mean_terminated_length": 220.11773986816405,
"completions/min_length": 0.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.336,
"grad_norm": 0.006541461683809757,
"learning_rate": 1e-06,
"loss": -0.0028,
"num_tokens": 354838575.0,
"reward": 0.9650772452354431,
"reward_std": 0.09242034554481507,
"rewards/accuracy_reward": 0.5484375,
"rewards/batch_coverage_0": 0.3875566303730011,
"rewards/batch_coverage_1": 0.3875566303730011,
"rewards/batch_coverage_10": 0.4233329236507416,
"rewards/batch_coverage_15": 0.43131263852119445,
"rewards/batch_coverage_20": 0.43660197257995603,
"rewards/batch_coverage_25": 0.43793233633041384,
"rewards/batch_coverage_5": 0.4037653625011444,
"rewards/brier_reward": 0.7984023332595825,
"rewards/confidence_uniqueness_reward": 0.9482085943222046,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_entropy_batch_reward": -0.24704246819019318,
"signal/accuracy_reward/centered_abs_mean": 0.088916015625,
"signal/accuracy_reward/group_std_mean": 0.12266767919063568,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7738893389701843,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0444580078125,
"signal/advantage_abs_mean": 0.7501138925552369,
"signal/advantage_pre_scale_abs_mean": 0.06785393953323364,
"signal/advantage_pre_scale_std": 0.11415418684482574,
"signal/advantage_std": 0.9827425599098205,
"signal/batch_coverage_0/centered_abs_mean": 0.1362660273909569,
"signal/batch_coverage_0/group_std_mean": 0.17492769360542298,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03478074930608273,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019486041273921728,
"signal/batch_coverage_1/centered_abs_mean": 0.1362660273909569,
"signal/batch_coverage_1/group_std_mean": 0.17492769360542298,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03478074930608273,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019486041273921728,
"signal/batch_coverage_10/centered_abs_mean": 0.14739642143249512,
"signal/batch_coverage_10/group_std_mean": 0.19052909314632416,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03750689923763275,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021077687153592704,
"signal/batch_coverage_15/centered_abs_mean": 0.14911286830902098,
"signal/batch_coverage_15/group_std_mean": 0.19245946705341338,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03794231489300728,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002132314071059227,
"signal/batch_coverage_20/centered_abs_mean": 0.153034570813179,
"signal/batch_coverage_20/group_std_mean": 0.1976184368133545,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03890108093619347,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002188394358381629,
"signal/batch_coverage_25/centered_abs_mean": 0.15436229705810547,
"signal/batch_coverage_25/group_std_mean": 0.19924021661281585,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03921979740262031,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002207380859181285,
"signal/batch_coverage_5/centered_abs_mean": 0.14039334505796433,
"signal/batch_coverage_5/group_std_mean": 0.18035671412944793,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03583626076579094,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020076247630640863,
"signal/brier_reward/centered_abs_mean": 0.11269704401493072,
"signal/brier_reward/group_std_mean": 0.14758805930614471,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1994762033224106,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011269704438745975,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015488607808947563,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023617172613739967,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0275675717741251,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015488607343286276,
"signal/format_reward/centered_abs_mean": 0.00264892578125,
"signal/format_reward/group_std_mean": 0.007733980286866426,
"signal/format_reward/group_zero_std_frac": 0.95625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.023863587900996207,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28287087082862855,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.357461279630661,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5021488547325135,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028287087753415107,
"step": 105
},
{
"calibration/aurc": 0.27019286399420744,
"calibration/batch_distribution_entropy": 0.9298135335293258,
"calibration/buffer_distribution_entropy": 0.9932549531185441,
"calibration/confidence_entropy": 0.4141606750385892,
"calibration/coverage@0%": 0.07255500783934542,
"calibration/coverage@1%": 0.07255500783934542,
"calibration/coverage@10%": 0.31170399103579516,
"calibration/coverage@15%": 0.39551763876797075,
"calibration/coverage@20%": 0.44607006463126186,
"calibration/coverage@25%": 0.5083359075246159,
"calibration/coverage@30%": 0.5835207135923078,
"calibration/coverage@5%": 0.1879378452586406,
"calibration/ece": 0.11614041938554136,
"calibration/mean_confidence": 0.4979168544314918,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00166015625,
"completions/max_length": 948.2,
"completions/max_terminated_length": 948.2,
"completions/mean_length": 233.632421875,
"completions/mean_terminated_length": 234.02719421386718,
"completions/min_length": 21.8,
"completions/min_terminated_length": 111.0,
"epoch": 0.352,
"grad_norm": 0.00914387870579958,
"learning_rate": 1e-06,
"loss": -0.0094,
"num_tokens": 372491387.0,
"reward": 0.9311908602714538,
"reward_std": 0.09648803919553757,
"rewards/accuracy_reward": 0.48447265625,
"rewards/batch_coverage_0": 0.38543524742126467,
"rewards/batch_coverage_1": 0.38543524742126467,
"rewards/batch_coverage_10": 0.4238883852958679,
"rewards/batch_coverage_15": 0.42850934267044066,
"rewards/batch_coverage_20": 0.43640496134757994,
"rewards/batch_coverage_25": 0.44086662530899046,
"rewards/batch_coverage_5": 0.41723122596740725,
"rewards/brier_reward": 0.7983362555503846,
"rewards/confidence_uniqueness_reward": 0.9474457859992981,
"rewards/format_reward": 0.99814453125,
"rewards/frontier_entropy_batch_reward": -0.2642006158828735,
"signal/accuracy_reward/centered_abs_mean": 0.099725341796875,
"signal/accuracy_reward/group_std_mean": 0.1285892456769943,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8679127812385559,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0498626708984375,
"signal/advantage_abs_mean": 0.7641012310981751,
"signal/advantage_pre_scale_abs_mean": 0.0740143045783043,
"signal/advantage_pre_scale_std": 0.11975871622562409,
"signal/advantage_std": 0.9827718019485474,
"signal/batch_coverage_0/centered_abs_mean": 0.14283648133277893,
"signal/batch_coverage_0/group_std_mean": 0.18093341290950776,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03563266433775425,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002042561722919345,
"signal/batch_coverage_1/centered_abs_mean": 0.14283648133277893,
"signal/batch_coverage_1/group_std_mean": 0.18093341290950776,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03563266433775425,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002042561722919345,
"signal/batch_coverage_10/centered_abs_mean": 0.15249529480934143,
"signal/batch_coverage_10/group_std_mean": 0.19459120631217958,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.038044761121273044,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002180682681500912,
"signal/batch_coverage_15/centered_abs_mean": 0.15441205203533173,
"signal/batch_coverage_15/group_std_mean": 0.19762863516807555,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03852261155843735,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002208092389628291,
"signal/batch_coverage_20/centered_abs_mean": 0.1582718998193741,
"signal/batch_coverage_20/group_std_mean": 0.2032813996076584,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03947751969099045,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002263288199901581,
"signal/batch_coverage_25/centered_abs_mean": 0.1593154788017273,
"signal/batch_coverage_25/group_std_mean": 0.20504648387432098,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.039732877910137174,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0022782113403081892,
"signal/batch_coverage_5/centered_abs_mean": 0.15034010708332063,
"signal/batch_coverage_5/group_std_mean": 0.19178448617458344,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03750232979655266,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002149863541126251,
"signal/brier_reward/centered_abs_mean": 0.1160271480679512,
"signal/brier_reward/group_std_mean": 0.14954084753990174,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20231397449970245,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011602715216577053,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017225971072912218,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02534388713538647,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030038028210401534,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001722597167827189,
"signal/format_reward/centered_abs_mean": 0.003497314453125,
"signal/format_reward/group_std_mean": 0.008663824107497931,
"signal/format_reward/group_zero_std_frac": 0.95625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.030156026408076287,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0017486572265625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30242173075675965,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37537208795547483,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5284075975418091,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03024217374622822,
"step": 110
},
{
"calibration/aurc": 0.32133141547128696,
"calibration/batch_distribution_entropy": 0.9596077713361109,
"calibration/buffer_distribution_entropy": 0.9951584201390368,
"calibration/confidence_entropy": 0.4573241392343905,
"calibration/coverage@0%": 0.030078125,
"calibration/coverage@1%": 0.030078125,
"calibration/coverage@10%": 0.11796875,
"calibration/coverage@15%": 0.183984375,
"calibration/coverage@20%": 0.3625,
"calibration/coverage@25%": 0.406640625,
"calibration/coverage@30%": 0.462109375,
"calibration/coverage@5%": 0.0546875,
"calibration/ece": 0.131855792371392,
"calibration/mean_confidence": 0.49068173815499366,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 929.6,
"completions/max_terminated_length": 929.6,
"completions/mean_length": 238.81806640625,
"completions/mean_terminated_length": 239.05218811035155,
"completions/min_length": 0.0,
"completions/min_terminated_length": 105.2,
"epoch": 0.368,
"grad_norm": 0.007342960219830275,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 390002356.0,
"reward": 0.9485709071159363,
"reward_std": 0.08474339246749878,
"rewards/accuracy_reward": 0.51044921875,
"rewards/batch_coverage_0": 0.39238272309303285,
"rewards/batch_coverage_1": 0.39238272309303285,
"rewards/batch_coverage_10": 0.4261459052562714,
"rewards/batch_coverage_15": 0.4329572141170502,
"rewards/batch_coverage_20": 0.43899570107460023,
"rewards/batch_coverage_25": 0.440056574344635,
"rewards/batch_coverage_5": 0.41612568497657776,
"rewards/brier_reward": 0.8009498476982116,
"rewards/confidence_uniqueness_reward": 0.9502113699913025,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_entropy_batch_reward": -0.23309923112392425,
"signal/accuracy_reward/centered_abs_mean": 0.082708740234375,
"signal/accuracy_reward/group_std_mean": 0.11486676782369613,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8056630492210388,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0413543701171875,
"signal/advantage_abs_mean": 0.7527588725090026,
"signal/advantage_pre_scale_abs_mean": 0.06306558772921562,
"signal/advantage_pre_scale_std": 0.10630078315734863,
"signal/advantage_std": 0.9825676798820495,
"signal/batch_coverage_0/centered_abs_mean": 0.139876489341259,
"signal/batch_coverage_0/group_std_mean": 0.17783224880695342,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03907948359847069,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020002338802441954,
"signal/batch_coverage_1/centered_abs_mean": 0.139876489341259,
"signal/batch_coverage_1/group_std_mean": 0.17783224880695342,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03907948359847069,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020002338802441954,
"signal/batch_coverage_10/centered_abs_mean": 0.14939309060573577,
"signal/batch_coverage_10/group_std_mean": 0.19122098684310912,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04172439575195312,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021363211795687675,
"signal/batch_coverage_15/centered_abs_mean": 0.15046306550502778,
"signal/batch_coverage_15/group_std_mean": 0.1928224891424179,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04200417771935463,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021516217617318033,
"signal/batch_coverage_20/centered_abs_mean": 0.15322479605674744,
"signal/batch_coverage_20/group_std_mean": 0.1975228577852249,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04276901260018349,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021911146119236945,
"signal/batch_coverage_25/centered_abs_mean": 0.1543430656194687,
"signal/batch_coverage_25/group_std_mean": 0.19888520240783691,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04310151115059853,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002207105793058872,
"signal/batch_coverage_5/centered_abs_mean": 0.14643795192241668,
"signal/batch_coverage_5/group_std_mean": 0.18699788451194763,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.040923018008470535,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002094062673859298,
"signal/brier_reward/centered_abs_mean": 0.10724746435880661,
"signal/brier_reward/group_std_mean": 0.140754859149456,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20941689908504485,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010724746435880662,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014008820429444313,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02020746245980263,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027339933440089226,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014008820755407215,
"signal/format_reward/centered_abs_mean": 0.0018798828125,
"signal/format_reward/group_std_mean": 0.0051879632286727425,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01840692777186632,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2798162639141083,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35085987448692324,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5465050220489502,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027981626987457275,
"step": 115
},
{
"calibration/aurc": 0.30339147131241734,
"calibration/batch_distribution_entropy": 0.9563765817949786,
"calibration/buffer_distribution_entropy": 0.9954902797933238,
"calibration/confidence_entropy": 0.44445111541300547,
"calibration/coverage@0%": 0.08164674947087455,
"calibration/coverage@1%": 0.13672487447087453,
"calibration/coverage@10%": 0.27734987447087456,
"calibration/coverage@15%": 0.30899049947087454,
"calibration/coverage@20%": 0.3421974466333012,
"calibration/coverage@25%": 0.3824318216333012,
"calibration/coverage@30%": 0.46457662214675943,
"calibration/coverage@5%": 0.22500612447087454,
"calibration/ece": 0.15227414567768388,
"calibration/mean_confidence": 0.4745961122709959,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 861.6,
"completions/max_terminated_length": 861.6,
"completions/mean_length": 231.5802734375,
"completions/mean_terminated_length": 231.82743225097656,
"completions/min_length": 19.8,
"completions/min_terminated_length": 105.0,
"epoch": 0.384,
"grad_norm": 0.006414992269128561,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 407230250.0,
"reward": 0.9628369092941285,
"reward_std": 0.08602556586265564,
"rewards/accuracy_reward": 0.540234375,
"rewards/batch_coverage_0": 0.3981931030750275,
"rewards/batch_coverage_1": 0.3981931030750275,
"rewards/batch_coverage_10": 0.43429911732673643,
"rewards/batch_coverage_15": 0.4370169997215271,
"rewards/batch_coverage_20": 0.4419554710388184,
"rewards/batch_coverage_25": 0.44801422357559206,
"rewards/batch_coverage_5": 0.4173740684986115,
"rewards/brier_reward": 0.8051711320877075,
"rewards/confidence_uniqueness_reward": 0.9488577246665955,
"rewards/format_reward": 0.998828125,
"rewards/frontier_entropy_batch_reward": -0.24640387594699859,
"signal/accuracy_reward/centered_abs_mean": 0.090576171875,
"signal/accuracy_reward/group_std_mean": 0.12041936963796615,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8770391941070557,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0452880859375,
"signal/advantage_abs_mean": 0.7650493621826172,
"signal/advantage_pre_scale_abs_mean": 0.06524530351161957,
"signal/advantage_pre_scale_std": 0.10862657576799392,
"signal/advantage_std": 0.982580029964447,
"signal/batch_coverage_0/centered_abs_mean": 0.14904703795909882,
"signal/batch_coverage_0/group_std_mean": 0.1901983439922333,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.041442494839429855,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021313726902008057,
"signal/batch_coverage_1/centered_abs_mean": 0.14904703795909882,
"signal/batch_coverage_1/group_std_mean": 0.1901983439922333,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.041442494839429855,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021313726902008057,
"signal/batch_coverage_10/centered_abs_mean": 0.1577294200658798,
"signal/batch_coverage_10/group_std_mean": 0.20299181640148162,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04381066411733627,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0022555307485163214,
"signal/batch_coverage_15/centered_abs_mean": 0.1570695459842682,
"signal/batch_coverage_15/group_std_mean": 0.20258375704288484,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04360567554831505,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0022460945881903173,
"signal/batch_coverage_20/centered_abs_mean": 0.1576721489429474,
"signal/batch_coverage_20/group_std_mean": 0.20356932580471038,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.043791229277849196,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002254711743444204,
"signal/batch_coverage_25/centered_abs_mean": 0.16238428950309752,
"signal/batch_coverage_25/group_std_mean": 0.209498855471611,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04516012445092201,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0023220953065901996,
"signal/batch_coverage_5/centered_abs_mean": 0.15410350263118744,
"signal/batch_coverage_5/group_std_mean": 0.19714944660663605,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04280069917440414,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002203680109232664,
"signal/brier_reward/centered_abs_mean": 0.1032984122633934,
"signal/brier_reward/group_std_mean": 0.1353047162294388,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20016059279441833,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010329841263592243,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015139190666377544,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022230926714837552,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029904866591095924,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001513919117860496,
"signal/format_reward/centered_abs_mean": 0.00225830078125,
"signal/format_reward/group_std_mean": 0.0062928176019340755,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02292755376547575,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001129150390625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28261598348617556,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3594805419445038,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5510689675807953,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02826159931719303,
"step": 120
},
{
"calibration/aurc": 0.3887606816696102,
"calibration/batch_distribution_entropy": 0.9813992911999595,
"calibration/buffer_distribution_entropy": 0.9948418303817892,
"calibration/confidence_entropy": 0.492348453496467,
"calibration/coverage@0%": 0.005473354616895874,
"calibration/coverage@1%": 0.005473354616895874,
"calibration/coverage@10%": 0.007426479616895874,
"calibration/coverage@15%": 0.014848354616895876,
"calibration/coverage@20%": 0.0964782355108055,
"calibration/coverage@25%": 0.22515502210216107,
"calibration/coverage@30%": 0.3283652075147348,
"calibration/coverage@5%": 0.005473354616895874,
"calibration/ece": 0.14469285404897078,
"calibration/mean_confidence": 0.5267769005041314,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 673.0,
"completions/max_terminated_length": 673.0,
"completions/mean_length": 213.291015625,
"completions/mean_terminated_length": 213.37777099609374,
"completions/min_length": 86.0,
"completions/min_terminated_length": 106.4,
"epoch": 0.4,
"grad_norm": 0.007502261083573103,
"learning_rate": 1e-06,
"loss": -0.0006,
"num_tokens": 424450798.0,
"reward": 0.9512521266937256,
"reward_std": 0.09306368678808212,
"rewards/accuracy_reward": 0.523046875,
"rewards/batch_coverage_0": 0.3664769411087036,
"rewards/batch_coverage_1": 0.3664769411087036,
"rewards/batch_coverage_10": 0.39776320457458497,
"rewards/batch_coverage_15": 0.4013418197631836,
"rewards/batch_coverage_20": 0.41172993183135986,
"rewards/batch_coverage_25": 0.4154829740524292,
"rewards/batch_coverage_5": 0.3841669142246246,
"rewards/brier_reward": 0.792135500907898,
"rewards/confidence_uniqueness_reward": 0.9506557941436767,
"rewards/format_reward": 0.999609375,
"rewards/frontier_entropy_batch_reward": -0.23586300015449524,
"signal/accuracy_reward/centered_abs_mean": 0.10125732421875,
"signal/accuracy_reward/group_std_mean": 0.13127839118242263,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9214963674545288,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.050628662109375,
"signal/advantage_abs_mean": 0.7701701760292053,
"signal/advantage_pre_scale_abs_mean": 0.07243436127901078,
"signal/advantage_pre_scale_std": 0.11784504354000092,
"signal/advantage_std": 0.9826903700828552,
"signal/batch_coverage_0/centered_abs_mean": 0.13559473752975465,
"signal/batch_coverage_0/group_std_mean": 0.1722516745328903,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.035601938143372536,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001939004845917225,
"signal/batch_coverage_1/centered_abs_mean": 0.13559473752975465,
"signal/batch_coverage_1/group_std_mean": 0.1722516745328903,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.035601938143372536,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001939004845917225,
"signal/batch_coverage_10/centered_abs_mean": 0.14347952902317046,
"signal/batch_coverage_10/group_std_mean": 0.18395382463932036,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03765551820397377,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002051757252775133,
"signal/batch_coverage_15/centered_abs_mean": 0.1431223601102829,
"signal/batch_coverage_15/group_std_mean": 0.18346399664878846,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.037582477927207945,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002046649740077555,
"signal/batch_coverage_20/centered_abs_mean": 0.14731760025024415,
"signal/batch_coverage_20/group_std_mean": 0.18973099887371064,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.038676262646913526,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021066416520625353,
"signal/batch_coverage_25/centered_abs_mean": 0.14638057053089143,
"signal/batch_coverage_25/group_std_mean": 0.18917132019996644,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.038373632729053496,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020932421321049333,
"signal/batch_coverage_5/centered_abs_mean": 0.13988438844680787,
"signal/batch_coverage_5/group_std_mean": 0.17847622334957122,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.036685329675674436,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020003467332571747,
"signal/brier_reward/centered_abs_mean": 0.11065952330827714,
"signal/brier_reward/group_std_mean": 0.14248354136943817,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20170999467372894,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01106595303863287,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013073159195482732,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017292667552828787,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02388041839003563,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013073159847408534,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_std_mean": 0.0018734002485871315,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006573101878166199,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27541821002960204,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3459975838661194,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5035546779632568,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027541821449995042,
"step": 125
},
{
"calibration/aurc": 0.27751930971271366,
"calibration/batch_distribution_entropy": 0.9501913731130441,
"calibration/buffer_distribution_entropy": 0.9958043564857535,
"calibration/confidence_entropy": 0.4522949540641205,
"calibration/coverage@0%": 0.021115918542074362,
"calibration/coverage@1%": 0.021115918542074362,
"calibration/coverage@10%": 0.1340845156555773,
"calibration/coverage@15%": 0.1720087756849315,
"calibration/coverage@20%": 0.22007705479452055,
"calibration/coverage@25%": 0.42624067392367904,
"calibration/coverage@30%": 0.6247683769569472,
"calibration/coverage@5%": 0.0973359527886497,
"calibration/ece": 0.10802325633813177,
"calibration/mean_confidence": 0.5223946163504338,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 754.0,
"completions/max_terminated_length": 754.0,
"completions/mean_length": 209.23974609375,
"completions/mean_terminated_length": 209.38225402832032,
"completions/min_length": 39.6,
"completions/min_terminated_length": 100.8,
"epoch": 0.416,
"grad_norm": 0.006731382571160793,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 441474597.0,
"reward": 0.954073143005371,
"reward_std": 0.08691072165966034,
"rewards/accuracy_reward": 0.5236328125,
"rewards/batch_coverage_0": 0.40355276465415957,
"rewards/batch_coverage_1": 0.40355276465415957,
"rewards/batch_coverage_10": 0.4401559591293335,
"rewards/batch_coverage_15": 0.4462449550628662,
"rewards/batch_coverage_20": 0.44900824427604674,
"rewards/batch_coverage_25": 0.45447943210601804,
"rewards/batch_coverage_5": 0.42884148359298707,
"rewards/brier_reward": 0.8059401035308837,
"rewards/confidence_uniqueness_reward": 0.9467169284820557,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_entropy_batch_reward": -0.25936628580093385,
"signal/accuracy_reward/centered_abs_mean": 0.0849365234375,
"signal/accuracy_reward/group_std_mean": 0.11482858657836914,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.780875825881958,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04246826171875,
"signal/advantage_abs_mean": 0.7536841630935669,
"signal/advantage_pre_scale_abs_mean": 0.0655998557806015,
"signal/advantage_pre_scale_std": 0.1087318018078804,
"signal/advantage_std": 0.9826650023460388,
"signal/batch_coverage_0/centered_abs_mean": 0.1364029973745346,
"signal/batch_coverage_0/group_std_mean": 0.1733390212059021,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.036052515357732774,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019505629083141685,
"signal/batch_coverage_1/centered_abs_mean": 0.1364029973745346,
"signal/batch_coverage_1/group_std_mean": 0.1733390212059021,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.036052515357732774,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019505629083141685,
"signal/batch_coverage_10/centered_abs_mean": 0.14312728643417358,
"signal/batch_coverage_10/group_std_mean": 0.18380964994430543,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03788130059838295,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020467202179133893,
"signal/batch_coverage_15/centered_abs_mean": 0.14330851435661315,
"signal/batch_coverage_15/group_std_mean": 0.18487076759338378,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.037964475154876706,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020493117161095144,
"signal/batch_coverage_20/centered_abs_mean": 0.1435598075389862,
"signal/batch_coverage_20/group_std_mean": 0.18555756211280822,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03806317374110222,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002052905270829797,
"signal/batch_coverage_25/centered_abs_mean": 0.1446547716856003,
"signal/batch_coverage_25/group_std_mean": 0.1876140534877777,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.038419923186302184,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002068563178181648,
"signal/batch_coverage_5/centered_abs_mean": 0.14238941073417663,
"signal/batch_coverage_5/group_std_mean": 0.18227325677871703,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03768376782536507,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020361685194075106,
"signal/brier_reward/centered_abs_mean": 0.10666648000478744,
"signal/brier_reward/group_std_mean": 0.13855001628398894,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19812132120132447,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010666648298501969,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01669100560247898,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022906759381294252,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03130178637802601,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016691005090251564,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.0038669900968670845,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01281973384320736,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2834647178649902,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3607568025588989,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5266608476638794,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02834647297859192,
"step": 130
},
{
"calibration/aurc": 0.2348983777396676,
"calibration/batch_distribution_entropy": 0.960256795358017,
"calibration/buffer_distribution_entropy": 0.9943258434526356,
"calibration/confidence_entropy": 0.4422803777578306,
"calibration/coverage@0%": 0.0546875,
"calibration/coverage@1%": 0.081640625,
"calibration/coverage@10%": 0.253515625,
"calibration/coverage@15%": 0.32229161570450093,
"calibration/coverage@20%": 0.4437775195694716,
"calibration/coverage@25%": 0.5684663955479452,
"calibration/coverage@30%": 0.6654231898238747,
"calibration/coverage@5%": 0.166796875,
"calibration/ece": 0.1278217715909456,
"calibration/mean_confidence": 0.5304076012280948,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 795.4,
"completions/max_terminated_length": 795.4,
"completions/mean_length": 215.02705078125,
"completions/mean_terminated_length": 215.13265686035157,
"completions/min_length": 21.2,
"completions/min_terminated_length": 101.8,
"epoch": 0.432,
"grad_norm": 0.007011502515524626,
"learning_rate": 1e-06,
"loss": -0.0004,
"num_tokens": 458690810.0,
"reward": 0.9772215843200683,
"reward_std": 0.08537227213382721,
"rewards/accuracy_reward": 0.56171875,
"rewards/batch_coverage_0": 0.429417085647583,
"rewards/batch_coverage_1": 0.429417085647583,
"rewards/batch_coverage_10": 0.45698114633560183,
"rewards/batch_coverage_15": 0.4633796989917755,
"rewards/batch_coverage_20": 0.46579416990280154,
"rewards/batch_coverage_25": 0.46876177191734314,
"rewards/batch_coverage_5": 0.448213255405426,
"rewards/brier_reward": 0.8230858683586121,
"rewards/confidence_uniqueness_reward": 0.9489996194839477,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_entropy_batch_reward": -0.2581829369068146,
"signal/accuracy_reward/centered_abs_mean": 0.08834228515625,
"signal/accuracy_reward/group_std_mean": 0.11721137315034866,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8492055177688599,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044171142578125,
"signal/advantage_abs_mean": 0.7724653244018554,
"signal/advantage_pre_scale_abs_mean": 0.06635667979717255,
"signal/advantage_pre_scale_std": 0.10899174213409424,
"signal/advantage_std": 0.982582688331604,
"signal/batch_coverage_0/centered_abs_mean": 0.1260914087295532,
"signal/batch_coverage_0/group_std_mean": 0.16242744624614716,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03515940457582474,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018031070707365871,
"signal/batch_coverage_1/centered_abs_mean": 0.1260914087295532,
"signal/batch_coverage_1/group_std_mean": 0.16242744624614716,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03515940457582474,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018031070707365871,
"signal/batch_coverage_10/centered_abs_mean": 0.13288309574127197,
"signal/batch_coverage_10/group_std_mean": 0.17225308418273927,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03706081435084343,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019002282759174704,
"signal/batch_coverage_15/centered_abs_mean": 0.13253578245639802,
"signal/batch_coverage_15/group_std_mean": 0.17227967381477355,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03687136918306351,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0018952616723254324,
"signal/batch_coverage_20/centered_abs_mean": 0.1341342270374298,
"signal/batch_coverage_20/group_std_mean": 0.17456189095973967,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03733382299542427,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.001918119378387928,
"signal/batch_coverage_25/centered_abs_mean": 0.13528763949871064,
"signal/batch_coverage_25/group_std_mean": 0.17607759833335876,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03762383908033371,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0019346131943166257,
"signal/batch_coverage_5/centered_abs_mean": 0.1318855404853821,
"signal/batch_coverage_5/group_std_mean": 0.17036759853363037,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03678369112312794,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0018859632313251496,
"signal/brier_reward/centered_abs_mean": 0.09676974564790726,
"signal/brier_reward/group_std_mean": 0.1267983391880989,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1873602271080017,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009676975198090076,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014930117689073086,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019978737458586693,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028808726742863656,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014930118340998888,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009044526517391205,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29253311157226564,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3665455937385559,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5665589928627014,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029253310710191726,
"step": 135
},
{
"calibration/aurc": 0.24297186329614986,
"calibration/batch_distribution_entropy": 0.9644706631929182,
"calibration/buffer_distribution_entropy": 0.9927333410589829,
"calibration/confidence_entropy": 0.4905251186083281,
"calibration/coverage@0%": 0.031253822162426614,
"calibration/coverage@1%": 0.031253822162426614,
"calibration/coverage@10%": 0.1747102800880626,
"calibration/coverage@15%": 0.26385380993150687,
"calibration/coverage@20%": 0.3224850171232877,
"calibration/coverage@25%": 0.4940076137475538,
"calibration/coverage@30%": 0.704204378669276,
"calibration/coverage@5%": 0.08285989481409002,
"calibration/ece": 0.10222825601300542,
"calibration/mean_confidence": 0.5681869079373103,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00087890625,
"completions/max_length": 871.4,
"completions/max_terminated_length": 871.4,
"completions/mean_length": 240.14736328125,
"completions/mean_terminated_length": 240.3585632324219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.448,
"grad_norm": 0.006543621886521578,
"learning_rate": 1e-06,
"loss": -0.0029,
"num_tokens": 476102719.0,
"reward": 0.9629115462303162,
"reward_std": 0.0858668938279152,
"rewards/accuracy_reward": 0.53369140625,
"rewards/batch_coverage_0": 0.39791461229324343,
"rewards/batch_coverage_1": 0.39791461229324343,
"rewards/batch_coverage_10": 0.42843729853630064,
"rewards/batch_coverage_15": 0.43190144896507265,
"rewards/batch_coverage_20": 0.43552638292312623,
"rewards/batch_coverage_25": 0.4384331822395325,
"rewards/batch_coverage_5": 0.41683014035224913,
"rewards/brier_reward": 0.8179983615875244,
"rewards/confidence_uniqueness_reward": 0.9507858991622925,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_entropy_batch_reward": -0.2251463621854782,
"signal/accuracy_reward/centered_abs_mean": 0.086688232421875,
"signal/accuracy_reward/group_std_mean": 0.11191904991865158,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8156797289848328,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0433441162109375,
"signal/advantage_abs_mean": 0.770751690864563,
"signal/advantage_pre_scale_abs_mean": 0.06612677872180939,
"signal/advantage_pre_scale_std": 0.10868151634931564,
"signal/advantage_std": 0.9826421141624451,
"signal/batch_coverage_0/centered_abs_mean": 0.13506021797657014,
"signal/batch_coverage_0/group_std_mean": 0.16939607560634612,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03622937873005867,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019313612021505833,
"signal/batch_coverage_1/centered_abs_mean": 0.13506021797657014,
"signal/batch_coverage_1/group_std_mean": 0.16939607560634612,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03622937873005867,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019313612021505833,
"signal/batch_coverage_10/centered_abs_mean": 0.14403230249881743,
"signal/batch_coverage_10/group_std_mean": 0.18216087818145751,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03862129971385002,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020596618996933103,
"signal/batch_coverage_15/centered_abs_mean": 0.1450017899274826,
"signal/batch_coverage_15/group_std_mean": 0.18356646001338958,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03888870552182198,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002073525660671294,
"signal/batch_coverage_20/centered_abs_mean": 0.14507793486118317,
"signal/batch_coverage_20/group_std_mean": 0.18390361666679383,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0389101043343544,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002074614446610212,
"signal/batch_coverage_25/centered_abs_mean": 0.1421796977519989,
"signal/batch_coverage_25/group_std_mean": 0.18051582276821138,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03810642510652542,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020331696607172487,
"signal/batch_coverage_5/centered_abs_mean": 0.1403295874595642,
"signal/batch_coverage_5/group_std_mean": 0.17662979066371917,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.037645730376243594,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020067131146788595,
"signal/brier_reward/centered_abs_mean": 0.09719461649656295,
"signal/brier_reward/group_std_mean": 0.12743473201990127,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18190354406833648,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009719461761415004,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013948087580502033,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02008185051381588,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02622562162578106,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013948087580502033,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_std_mean": 0.004971844423562288,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016198099590837955,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28304690420627593,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3540629267692566,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5298369646072387,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02830469198524952,
"step": 140
},
{
"calibration/aurc": 0.3804163715227877,
"calibration/batch_distribution_entropy": 0.9761867561052131,
"calibration/buffer_distribution_entropy": 0.9923661442430166,
"calibration/confidence_entropy": 0.4707709836599577,
"calibration/coverage@0%": 0.0023452788649706456,
"calibration/coverage@1%": 0.0023452788649706456,
"calibration/coverage@10%": 0.006251528864970646,
"calibration/coverage@15%": 0.026173403864970645,
"calibration/coverage@20%": 0.10903406311154598,
"calibration/coverage@25%": 0.30165193860078277,
"calibration/coverage@30%": 0.3919398238747554,
"calibration/coverage@5%": 0.0023452788649706456,
"calibration/ece": 0.12226193847865793,
"calibration/mean_confidence": 0.49554941778696,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0015625,
"completions/max_length": 1013.2,
"completions/max_terminated_length": 1013.2,
"completions/mean_length": 261.70986328125,
"completions/mean_terminated_length": 262.12346801757815,
"completions/min_length": 23.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.464,
"grad_norm": 0.007262748200446367,
"learning_rate": 1e-06,
"loss": -0.0074,
"num_tokens": 493953444.0,
"reward": 0.9251135468482972,
"reward_std": 0.08157364279031754,
"rewards/accuracy_reward": 0.46767578125,
"rewards/batch_coverage_0": 0.4047118484973907,
"rewards/batch_coverage_1": 0.4047118484973907,
"rewards/batch_coverage_10": 0.4350186824798584,
"rewards/batch_coverage_15": 0.44130164980888364,
"rewards/batch_coverage_20": 0.4450684428215027,
"rewards/batch_coverage_25": 0.4465096712112427,
"rewards/batch_coverage_5": 0.4212812721729279,
"rewards/brier_reward": 0.7872581958770752,
"rewards/confidence_uniqueness_reward": 0.9485163807868957,
"rewards/format_reward": 0.99833984375,
"rewards/frontier_entropy_batch_reward": -0.24351753890514374,
"signal/accuracy_reward/centered_abs_mean": 0.067730712890625,
"signal/accuracy_reward/group_std_mean": 0.0957780659198761,
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6515132248401642,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0338653564453125,
"signal/advantage_abs_mean": 0.7560706853866577,
"signal/advantage_pre_scale_abs_mean": 0.05970721915364265,
"signal/advantage_pre_scale_std": 0.10258275270462036,
"signal/advantage_std": 0.9825798630714416,
"signal/batch_coverage_0/centered_abs_mean": 0.13802915811538696,
"signal/batch_coverage_0/group_std_mean": 0.17626523077487946,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0383618026971817,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001973817031830549,
"signal/batch_coverage_1/centered_abs_mean": 0.13802915811538696,
"signal/batch_coverage_1/group_std_mean": 0.17626523077487946,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0383618026971817,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001973817031830549,
"signal/batch_coverage_10/centered_abs_mean": 0.14598531723022462,
"signal/batch_coverage_10/group_std_mean": 0.18837699592113494,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040507327765226364,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020875899121165276,
"signal/batch_coverage_15/centered_abs_mean": 0.14799394309520722,
"signal/batch_coverage_15/group_std_mean": 0.1916155368089676,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.041080842912197116,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021163133904337885,
"signal/batch_coverage_20/centered_abs_mean": 0.14927698969841002,
"signal/batch_coverage_20/group_std_mean": 0.19369731545448304,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04144330024719238,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002134660817682743,
"signal/batch_coverage_25/centered_abs_mean": 0.14754628241062165,
"signal/batch_coverage_25/group_std_mean": 0.19175436496734619,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04096244126558304,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021099118515849114,
"signal/batch_coverage_5/centered_abs_mean": 0.14244378805160524,
"signal/batch_coverage_5/group_std_mean": 0.18289848864078523,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.039581865072250366,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002036946127191186,
"signal/brier_reward/centered_abs_mean": 0.11198951303958893,
"signal/brier_reward/group_std_mean": 0.1472228556871414,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21724056005477904,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011198951117694377,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01595969293266535,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024591311067342757,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03095446974039078,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001595969288609922,
"signal/format_reward/centered_abs_mean": 0.003204345703125,
"signal/format_reward/group_std_mean": 0.009054953465238214,
"signal/format_reward/group_zero_std_frac": 0.95,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03110705818980932,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0016021728515625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28415713310241697,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3556749701499939,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5508628249168396,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02841571271419525,
"step": 145
},
{
"calibration/aurc": 0.2458841873288713,
"calibration/batch_distribution_entropy": 0.9603942966500225,
"calibration/buffer_distribution_entropy": 0.9914970698215189,
"calibration/confidence_entropy": 0.4505998554101692,
"calibration/coverage@0%": 0.04422625942500288,
"calibration/coverage@1%": 0.04422625942500288,
"calibration/coverage@10%": 0.2567775813236254,
"calibration/coverage@15%": 0.3147055226200069,
"calibration/coverage@20%": 0.3691025862399754,
"calibration/coverage@25%": 0.4325483541019147,
"calibration/coverage@30%": 0.6278100598115959,
"calibration/coverage@5%": 0.11351677014984074,
"calibration/ece": 0.1281255298427196,
"calibration/mean_confidence": 0.5108871765777298,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003125,
"completions/max_length": 1096.2,
"completions/max_terminated_length": 1096.2,
"completions/mean_length": 270.9138671875,
"completions/mean_terminated_length": 271.7705505371094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.2,
"epoch": 0.48,
"grad_norm": 0.006920052692294121,
"learning_rate": 1e-06,
"loss": -0.0053,
"num_tokens": 511775634.0,
"reward": 0.9541318893432618,
"reward_std": 0.09775560349225998,
"rewards/accuracy_reward": 0.53134765625,
"rewards/batch_coverage_0": 0.3797847807407379,
"rewards/batch_coverage_1": 0.3797847807407379,
"rewards/batch_coverage_10": 0.4200979769229889,
"rewards/batch_coverage_15": 0.4258685171604156,
"rewards/batch_coverage_20": 0.432542085647583,
"rewards/batch_coverage_25": 0.43280801773071287,
"rewards/batch_coverage_5": 0.40181149244308473,
"rewards/brier_reward": 0.7927872776985169,
"rewards/confidence_uniqueness_reward": 0.9472802758216858,
"rewards/format_reward": 0.996875,
"rewards/frontier_entropy_batch_reward": -0.2506578862667084,
"signal/accuracy_reward/centered_abs_mean": 0.108282470703125,
"signal/accuracy_reward/group_std_mean": 0.14222493767738342,
"signal/accuracy_reward/group_zero_std_frac": 0.59375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9324786543846131,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0541412353515625,
"signal/advantage_abs_mean": 0.7579309821128846,
"signal/advantage_pre_scale_abs_mean": 0.07357524037361145,
"signal/advantage_pre_scale_std": 0.1220558226108551,
"signal/advantage_std": 0.9827972650527954,
"signal/batch_coverage_0/centered_abs_mean": 0.15051166117191314,
"signal/batch_coverage_0/group_std_mean": 0.1916230082511902,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03701958805322647,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021523167844861744,
"signal/batch_coverage_1/centered_abs_mean": 0.15051166117191314,
"signal/batch_coverage_1/group_std_mean": 0.1916230082511902,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03701958805322647,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021523167844861744,
"signal/batch_coverage_10/centered_abs_mean": 0.15894248485565185,
"signal/batch_coverage_10/group_std_mean": 0.20470293164253234,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03905408829450607,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0022728775627911093,
"signal/batch_coverage_15/centered_abs_mean": 0.15886342227458955,
"signal/batch_coverage_15/group_std_mean": 0.20532366633415222,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03902169317007065,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002271746890619397,
"signal/batch_coverage_20/centered_abs_mean": 0.16236689388751985,
"signal/batch_coverage_20/group_std_mean": 0.21032328605651857,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03986440747976303,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0023218465503305197,
"signal/batch_coverage_25/centered_abs_mean": 0.16170798540115355,
"signal/batch_coverage_25/group_std_mean": 0.2097803145647049,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03969116657972336,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0023124242201447488,
"signal/batch_coverage_5/centered_abs_mean": 0.15605857968330383,
"signal/batch_coverage_5/group_std_mean": 0.20000146925449372,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03839111030101776,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.00223163771443069,
"signal/brier_reward/centered_abs_mean": 0.11516801714897155,
"signal/brier_reward/group_std_mean": 0.150071182847023,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19804134964942932,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01151680201292038,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018391324393451213,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02773902639746666,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03177299872040749,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001839132490567863,
"signal/format_reward/centered_abs_mean": 0.00570068359375,
"signal/format_reward/group_std_mean": 0.012306397967040538,
"signal/format_reward/group_zero_std_frac": 0.94375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04965853579342365,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002850341796875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29401772022247313,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3682851493358612,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5058897852897644,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02940177321434021,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.42659577713684294,
"eval_calibration/batch_distribution_entropy": 0.9184063160188691,
"eval_calibration/buffer_distribution_entropy": 0.9908475784106092,
"eval_calibration/confidence_entropy": 0.43509464264808095,
"eval_calibration/coverage@0%": 0.1171875,
"eval_calibration/coverage@1%": 0.1171875,
"eval_calibration/coverage@10%": 0.15625,
"eval_calibration/coverage@15%": 0.15625,
"eval_calibration/coverage@20%": 0.234375,
"eval_calibration/coverage@25%": 0.2578125,
"eval_calibration/coverage@30%": 0.2734375,
"eval_calibration/coverage@5%": 0.1171875,
"eval_calibration/ece": 0.22639856153688892,
"eval_calibration/mean_confidence": 0.5023909767807211,
"eval_completions/clipped_ratio": 0.004108297413793094,
"eval_completions/max_length": 947.25,
"eval_completions/max_terminated_length": 947.25,
"eval_completions/mean_length": 272.63745880126953,
"eval_completions/mean_terminated_length": 273.78321838378906,
"eval_completions/min_length": 68.75,
"eval_completions/min_terminated_length": 135.0,
"eval_loss": 0.0,
"eval_num_tokens": 511775634.0,
"eval_reward": 0.8025836795568466,
"eval_reward_std": 0.2362028956413269,
"eval_rewards/accuracy_reward": 0.439453125,
"eval_rewards/batch_coverage_0": 0.16702717542648315,
"eval_rewards/batch_coverage_1": 0.16702717542648315,
"eval_rewards/batch_coverage_10": 0.16157682612538338,
"eval_rewards/batch_coverage_15": 0.15290211886167526,
"eval_rewards/batch_coverage_20": 0.13275382481515408,
"eval_rewards/batch_coverage_25": 0.12572277709841728,
"eval_rewards/batch_coverage_5": 0.16702717542648315,
"eval_rewards/brier_reward": 0.7974715679883957,
"eval_rewards/confidence_uniqueness_reward": 0.8931373059749603,
"eval_rewards/format_reward": 0.99609375,
"eval_rewards/frontier_entropy_batch_reward": -0.99609375,
"eval_runtime": 49.7568,
"eval_samples_per_second": 10.049,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4735107421875,
"eval_signal/accuracy_reward/group_std_mean": 0.4941246137022972,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0077248513698578,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23675537109375,
"eval_signal/advantage_abs_mean": 0.9267684817314148,
"eval_signal/advantage_pre_scale_abs_mean": 0.21922592446208,
"eval_signal/advantage_pre_scale_std": 0.23409972339868546,
"eval_signal/advantage_std": 0.9876974821090698,
"eval_signal/batch_coverage_0/centered_abs_mean": 0.2995058670639992,
"eval_signal/batch_coverage_0/group_std_mean": 0.3732306435704231,
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018284518970176578,
"eval_signal/batch_coverage_0/weight": 0.014299999922513962,
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004282933892682195,
"eval_signal/batch_coverage_1/centered_abs_mean": 0.2995058670639992,
"eval_signal/batch_coverage_1/group_std_mean": 0.3732306435704231,
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018284518970176578,
"eval_signal/batch_coverage_1/weight": 0.014299999922513962,
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004282933892682195,
"eval_signal/batch_coverage_10/centered_abs_mean": 0.2808452136814594,
"eval_signal/batch_coverage_10/group_std_mean": 0.34837885946035385,
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.017164529534056783,
"eval_signal/batch_coverage_10/weight": 0.014299999922513962,
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004016086459159851,
"eval_signal/batch_coverage_15/centered_abs_mean": 0.2556031718850136,
"eval_signal/batch_coverage_15/group_std_mean": 0.31662533432245255,
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015653746901080012,
"eval_signal/batch_coverage_15/weight": 0.014299999922513962,
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.0036551255034282804,
"eval_signal/batch_coverage_20/centered_abs_mean": 0.21076411753892899,
"eval_signal/batch_coverage_20/group_std_mean": 0.26321645826101303,
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012889966601505876,
"eval_signal/batch_coverage_20/weight": 0.014299999922513962,
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.00301392690744251,
"eval_signal/batch_coverage_25/centered_abs_mean": 0.19837579876184464,
"eval_signal/batch_coverage_25/group_std_mean": 0.2488715946674347,
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012128992471843958,
"eval_signal/batch_coverage_25/weight": 0.014299999922513962,
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.002836774045135826,
"eval_signal/batch_coverage_5/centered_abs_mean": 0.2995058670639992,
"eval_signal/batch_coverage_5/group_std_mean": 0.3732306435704231,
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.018284518970176578,
"eval_signal/batch_coverage_5/weight": 0.014299999922513962,
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004282933892682195,
"eval_signal/brier_reward/centered_abs_mean": 0.20047112554311752,
"eval_signal/brier_reward/group_std_mean": 0.2557060122489929,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08551956340670586,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020047113299369812,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04430906008929014,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.061461527831852436,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01891739433631301,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004430905915796757,
"eval_signal/format_reward/centered_abs_mean": 0.007568359375,
"eval_signal/format_reward/group_std_mean": 0.022097086533904076,
"eval_signal/format_reward/group_zero_std_frac": 0.875,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.016403171233832836,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.007568359375,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022097086533904076,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.875,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0032806345261633396,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0007568359724245965,
"eval_steps_per_second": 0.08,
"step": 150
},
{
"calibration/aurc": 0.3286843099258942,
"calibration/batch_distribution_entropy": 0.9648263201622242,
"calibration/buffer_distribution_entropy": 0.9903971698614772,
"calibration/confidence_entropy": 0.4572479206610348,
"calibration/coverage@0%": 0.046269167911196556,
"calibration/coverage@1%": 0.05019073653864754,
"calibration/coverage@10%": 0.18040620276119976,
"calibration/coverage@15%": 0.23105071677694142,
"calibration/coverage@20%": 0.32822291412780497,
"calibration/coverage@25%": 0.3756455780018698,
"calibration/coverage@30%": 0.4696543494734263,
"calibration/coverage@5%": 0.11375486057618409,
"calibration/ece": 0.13474628859908178,
"calibration/mean_confidence": 0.5375935124565585,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00205078125,
"completions/max_length": 1224.8,
"completions/max_terminated_length": 1224.8,
"completions/mean_length": 266.94423828125,
"completions/mean_terminated_length": 267.50035400390624,
"completions/min_length": 26.8,
"completions/min_terminated_length": 121.0,
"epoch": 0.496,
"grad_norm": 0.0065423352643847466,
"learning_rate": 1e-06,
"loss": -0.0124,
"num_tokens": 529816983.0,
"reward": 0.9738301515579224,
"reward_std": 0.0878347024321556,
"rewards/accuracy_reward": 0.56259765625,
"rewards/batch_coverage_0": 0.39764232039451597,
"rewards/batch_coverage_1": 0.39764232039451597,
"rewards/batch_coverage_10": 0.4279240250587463,
"rewards/batch_coverage_15": 0.4341652512550354,
"rewards/batch_coverage_20": 0.4424472451210022,
"rewards/batch_coverage_25": 0.4455852508544922,
"rewards/batch_coverage_5": 0.4148296773433685,
"rewards/brier_reward": 0.8039668917655944,
"rewards/confidence_uniqueness_reward": 0.9486234784126282,
"rewards/format_reward": 0.99794921875,
"rewards/frontier_entropy_batch_reward": -0.24033711552619935,
"signal/accuracy_reward/centered_abs_mean": 0.079705810546875,
"signal/accuracy_reward/group_std_mean": 0.10795025080442429,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.73244309425354,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0398529052734375,
"signal/advantage_abs_mean": 0.7574346184730529,
"signal/advantage_pre_scale_abs_mean": 0.06559118181467057,
"signal/advantage_pre_scale_std": 0.11137249171733857,
"signal/advantage_std": 0.9826851725578308,
"signal/batch_coverage_0/centered_abs_mean": 0.13030258417129517,
"signal/batch_coverage_0/group_std_mean": 0.1680112361907959,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.034121598303318026,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018633269704878331,
"signal/batch_coverage_1/centered_abs_mean": 0.13030258417129517,
"signal/batch_coverage_1/group_std_mean": 0.1680112361907959,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.034121598303318026,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018633269704878331,
"signal/batch_coverage_10/centered_abs_mean": 0.13791993260383606,
"signal/batch_coverage_10/group_std_mean": 0.17841829657554625,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03606404885649681,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019722550408914686,
"signal/batch_coverage_15/centered_abs_mean": 0.13972941935062408,
"signal/batch_coverage_15/group_std_mean": 0.18158538937568663,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.036530570685863496,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0019981306744739414,
"signal/batch_coverage_20/centered_abs_mean": 0.1417074352502823,
"signal/batch_coverage_20/group_std_mean": 0.18445950746536255,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03704546689987183,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002026416314765811,
"signal/batch_coverage_25/centered_abs_mean": 0.1412410706281662,
"signal/batch_coverage_25/group_std_mean": 0.18436720073223115,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.036921939253807066,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.00201974727679044,
"signal/batch_coverage_5/centered_abs_mean": 0.13376755714416505,
"signal/batch_coverage_5/group_std_mean": 0.1726322054862976,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03502202108502388,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019128760090097786,
"signal/brier_reward/centered_abs_mean": 0.10100103318691253,
"signal/brier_reward/group_std_mean": 0.13356745839118958,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18487447798252105,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010100103542208671,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01650053486227989,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026250819489359856,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030377379804849624,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016500535421073437,
"signal/format_reward/centered_abs_mean": 0.003948974609375,
"signal/format_reward/group_std_mean": 0.010928353667259217,
"signal/format_reward/group_zero_std_frac": 0.940625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03621824383735657,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0019744873046875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28486855030059816,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35629957914352417,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5223815381526947,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028486854583024978,
"step": 155
},
{
"calibration/aurc": 0.250762394647414,
"calibration/batch_distribution_entropy": 0.9787780953518853,
"calibration/buffer_distribution_entropy": 0.9903697964720649,
"calibration/confidence_entropy": 0.49523146648508537,
"calibration/coverage@0%": 0.05824618602362205,
"calibration/coverage@1%": 0.07777743602362205,
"calibration/coverage@10%": 0.34116941437007875,
"calibration/coverage@15%": 0.4302688238188976,
"calibration/coverage@20%": 0.48497170275590556,
"calibration/coverage@25%": 0.5420613927165354,
"calibration/coverage@30%": 0.6202202263779528,
"calibration/coverage@5%": 0.19384227362204726,
"calibration/ece": 0.1276741663789216,
"calibration/mean_confidence": 0.5013257373811151,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00224609375,
"completions/max_length": 1093.2,
"completions/max_terminated_length": 1093.2,
"completions/mean_length": 256.78408203125,
"completions/mean_terminated_length": 257.3715850830078,
"completions/min_length": 22.6,
"completions/min_terminated_length": 120.8,
"epoch": 0.512,
"grad_norm": 0.006810260470956564,
"learning_rate": 1e-06,
"loss": -0.0089,
"num_tokens": 547592116.0,
"reward": 0.9719028115272522,
"reward_std": 0.08941576033830642,
"rewards/accuracy_reward": 0.55048828125,
"rewards/batch_coverage_0": 0.3955975353717804,
"rewards/batch_coverage_1": 0.3955975353717804,
"rewards/batch_coverage_10": 0.43007825016975404,
"rewards/batch_coverage_15": 0.43892077207565305,
"rewards/batch_coverage_20": 0.4460327446460724,
"rewards/batch_coverage_25": 0.4471981167793274,
"rewards/batch_coverage_5": 0.41255890727043154,
"rewards/brier_reward": 0.8169501304626465,
"rewards/confidence_uniqueness_reward": 0.9497070074081421,
"rewards/format_reward": 0.99775390625,
"rewards/frontier_entropy_batch_reward": -0.21297547519207,
"signal/accuracy_reward/centered_abs_mean": 0.089019775390625,
"signal/accuracy_reward/group_std_mean": 0.12106681168079376,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.841943883895874,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0445098876953125,
"signal/advantage_abs_mean": 0.7453524589538574,
"signal/advantage_pre_scale_abs_mean": 0.06581479609012604,
"signal/advantage_pre_scale_std": 0.11291303932666778,
"signal/advantage_std": 0.9826244473457336,
"signal/batch_coverage_0/centered_abs_mean": 0.13738665282726287,
"signal/batch_coverage_0/group_std_mean": 0.17612952888011932,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.037176710367202756,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001964629115536809,
"signal/batch_coverage_1/centered_abs_mean": 0.13738665282726287,
"signal/batch_coverage_1/group_std_mean": 0.17612952888011932,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.037176710367202756,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001964629115536809,
"signal/batch_coverage_10/centered_abs_mean": 0.1476728081703186,
"signal/batch_coverage_10/group_std_mean": 0.1905215263366699,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03995952680706978,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021117211086675524,
"signal/batch_coverage_15/centered_abs_mean": 0.14803148657083512,
"signal/batch_coverage_15/group_std_mean": 0.19160535037517548,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04006317034363747,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021168502746149896,
"signal/batch_coverage_20/centered_abs_mean": 0.15196847915649414,
"signal/batch_coverage_20/group_std_mean": 0.19699666202068328,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04112867340445518,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021731492131948473,
"signal/batch_coverage_25/centered_abs_mean": 0.15217567086219788,
"signal/batch_coverage_25/group_std_mean": 0.19736577272415162,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04118807390332222,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002176112146116793,
"signal/batch_coverage_5/centered_abs_mean": 0.1416477769613266,
"signal/batch_coverage_5/group_std_mean": 0.18204045295715332,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03832144886255264,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002025563293136656,
"signal/brier_reward/centered_abs_mean": 0.1010772556066513,
"signal/brier_reward/group_std_mean": 0.13320232033729554,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19146246314048768,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010107725858688354,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015872811153531074,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024924156628549098,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030068162456154825,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015872811432927846,
"signal/format_reward/centered_abs_mean": 0.004254150390625,
"signal/format_reward/group_std_mean": 0.010569548420608043,
"signal/format_reward/group_zero_std_frac": 0.946875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04026953727006912,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0021270751953125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26627694964408877,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3385903060436249,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5046402394771576,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026627695932984353,
"step": 160
},
{
"calibration/aurc": 0.182792093122934,
"calibration/batch_distribution_entropy": 0.9824451422595312,
"calibration/buffer_distribution_entropy": 0.9911765451685219,
"calibration/confidence_entropy": 0.47350543428585035,
"calibration/coverage@0%": 0.08735411791189954,
"calibration/coverage@1%": 0.10381101869775415,
"calibration/coverage@10%": 0.33079101600119426,
"calibration/coverage@15%": 0.46098051253900374,
"calibration/coverage@20%": 0.5857192411591355,
"calibration/coverage@25%": 0.6962948664239763,
"calibration/coverage@30%": 0.7833417600832082,
"calibration/coverage@5%": 0.2562663569234177,
"calibration/ece": 0.11077890418773365,
"calibration/mean_confidence": 0.5144188109520605,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003125,
"completions/max_length": 1267.0,
"completions/max_terminated_length": 1267.0,
"completions/mean_length": 264.58388671875,
"completions/mean_terminated_length": 265.4162841796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.528,
"grad_norm": 0.008622488006949425,
"learning_rate": 1e-06,
"loss": -0.0128,
"num_tokens": 565330991.0,
"reward": 0.9707194685935974,
"reward_std": 0.09202601760625839,
"rewards/accuracy_reward": 0.54912109375,
"rewards/batch_coverage_0": 0.42622668743133546,
"rewards/batch_coverage_1": 0.42622668743133546,
"rewards/batch_coverage_10": 0.45363242626190187,
"rewards/batch_coverage_15": 0.46004220843315125,
"rewards/batch_coverage_20": 0.46755346059799197,
"rewards/batch_coverage_25": 0.4684960961341858,
"rewards/batch_coverage_5": 0.4366525709629059,
"rewards/brier_reward": 0.8200646162033081,
"rewards/confidence_uniqueness_reward": 0.9470735311508178,
"rewards/format_reward": 0.996875,
"rewards/frontier_entropy_batch_reward": -0.23877668678760527,
"signal/accuracy_reward/centered_abs_mean": 0.096038818359375,
"signal/accuracy_reward/group_std_mean": 0.12631949186325073,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9202286958694458,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0480194091796875,
"signal/advantage_abs_mean": 0.7485239148139954,
"signal/advantage_pre_scale_abs_mean": 0.06765339076519013,
"signal/advantage_pre_scale_std": 0.1165284737944603,
"signal/advantage_std": 0.9825997471809387,
"signal/batch_coverage_0/centered_abs_mean": 0.13870301246643066,
"signal/batch_coverage_0/group_std_mean": 0.17862937450408936,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03813367709517479,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019834531703963876,
"signal/batch_coverage_1/centered_abs_mean": 0.13870301246643066,
"signal/batch_coverage_1/group_std_mean": 0.17862937450408936,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03813367709517479,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019834531703963876,
"signal/batch_coverage_10/centered_abs_mean": 0.14713135063648225,
"signal/batch_coverage_10/group_std_mean": 0.19096679985523224,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04046852439641953,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021039783488959072,
"signal/batch_coverage_15/centered_abs_mean": 0.14752067029476165,
"signal/batch_coverage_15/group_std_mean": 0.19163157641887665,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04058904945850372,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.00210954574868083,
"signal/batch_coverage_20/centered_abs_mean": 0.15036363005638123,
"signal/batch_coverage_20/group_std_mean": 0.19588211476802825,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04134969413280487,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002150199841707945,
"signal/batch_coverage_25/centered_abs_mean": 0.15014814138412474,
"signal/batch_coverage_25/group_std_mean": 0.195842045545578,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04129090085625649,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021471184212714435,
"signal/batch_coverage_5/centered_abs_mean": 0.1420408606529236,
"signal/batch_coverage_5/group_std_mean": 0.18300524055957795,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03905327394604683,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020311842672526836,
"signal/brier_reward/centered_abs_mean": 0.09753091931343079,
"signal/brier_reward/group_std_mean": 0.13158592879772185,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18743386566638948,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009753092005848885,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017668948322534562,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029466599225997925,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03409051336348057,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017668949207291008,
"signal/format_reward/centered_abs_mean": 0.00595703125,
"signal/format_reward/group_std_mean": 0.015264297416433693,
"signal/format_reward/group_zero_std_frac": 0.921875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.057597226649522784,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002978515625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27023516297340394,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3435254514217377,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5200891494750977,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027023518458008766,
"step": 165
},
{
"calibration/aurc": 0.20382781310413386,
"calibration/batch_distribution_entropy": 0.9231029218947485,
"calibration/buffer_distribution_entropy": 0.9907112369323047,
"calibration/confidence_entropy": 0.4080044647499128,
"calibration/coverage@0%": 0.022343892028416367,
"calibration/coverage@1%": 0.022343892028416367,
"calibration/coverage@10%": 0.20518062547057977,
"calibration/coverage@15%": 0.3918224197914523,
"calibration/coverage@20%": 0.5747048732359679,
"calibration/coverage@25%": 0.7370451964020966,
"calibration/coverage@30%": 0.804878027001573,
"calibration/coverage@5%": 0.10079797326181579,
"calibration/ece": 0.08342971454497072,
"calibration/mean_confidence": 0.5725822810234342,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00478515625,
"completions/max_length": 1112.0,
"completions/max_terminated_length": 1112.0,
"completions/mean_length": 272.848828125,
"completions/mean_terminated_length": 274.163623046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.2,
"epoch": 0.544,
"grad_norm": 0.006315728649497032,
"learning_rate": 1e-06,
"loss": -0.0202,
"num_tokens": 583288547.0,
"reward": 0.9795204758644104,
"reward_std": 0.10076274275779724,
"rewards/accuracy_reward": 0.5869140625,
"rewards/batch_coverage_0": 0.38468037247657777,
"rewards/batch_coverage_1": 0.38468037247657777,
"rewards/batch_coverage_10": 0.4374719798564911,
"rewards/batch_coverage_15": 0.4450931191444397,
"rewards/batch_coverage_20": 0.4514342784881592,
"rewards/batch_coverage_25": 0.45464577078819274,
"rewards/batch_coverage_5": 0.41293213367462156,
"rewards/brier_reward": 0.8001878976821899,
"rewards/confidence_uniqueness_reward": 0.9425620436668396,
"rewards/format_reward": 0.99521484375,
"rewards/frontier_entropy_batch_reward": -0.2830341339111328,
"signal/accuracy_reward/centered_abs_mean": 0.0957275390625,
"signal/accuracy_reward/group_std_mean": 0.13344258964061737,
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.831443476676941,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04786376953125,
"signal/advantage_abs_mean": 0.734432065486908,
"signal/advantage_pre_scale_abs_mean": 0.07164475619792939,
"signal/advantage_pre_scale_std": 0.12459437847137451,
"signal/advantage_std": 0.9827680230140686,
"signal/batch_coverage_0/centered_abs_mean": 0.13583238422870636,
"signal/batch_coverage_0/group_std_mean": 0.1736106514930725,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03401588536798954,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019424030557274818,
"signal/batch_coverage_1/centered_abs_mean": 0.13583238422870636,
"signal/batch_coverage_1/group_std_mean": 0.1736106514930725,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03401588536798954,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019424030557274818,
"signal/batch_coverage_10/centered_abs_mean": 0.1505550354719162,
"signal/batch_coverage_10/group_std_mean": 0.1952440172433853,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.037708821892738345,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002152937022037804,
"signal/batch_coverage_15/centered_abs_mean": 0.15017966628074647,
"signal/batch_coverage_15/group_std_mean": 0.1952642858028412,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03756738603115082,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002147569367662072,
"signal/batch_coverage_20/centered_abs_mean": 0.15021539926528932,
"signal/batch_coverage_20/group_std_mean": 0.19603228569030762,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03747752532362938,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021480801980942486,
"signal/batch_coverage_25/centered_abs_mean": 0.15165410339832305,
"signal/batch_coverage_25/group_std_mean": 0.19860296845436096,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03792189955711365,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021686537191271784,
"signal/batch_coverage_5/centered_abs_mean": 0.14168917536735534,
"signal/batch_coverage_5/group_std_mean": 0.1822911262512207,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03544792048633098,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020261551951989533,
"signal/brier_reward/centered_abs_mean": 0.11252984702587128,
"signal/brier_reward/group_std_mean": 0.14877772629261016,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19660184383392335,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011252984963357448,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02271880432963371,
"signal/confidence_uniqueness_reward/group_std_mean": 0.037897758185863495,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04011071212589741,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002271880488842726,
"signal/format_reward/centered_abs_mean": 0.008966064453125,
"signal/format_reward/group_std_mean": 0.021273162961006165,
"signal/format_reward/group_zero_std_frac": 0.896875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07927814871072769,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0044830322265625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29769091606140136,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37242411971092226,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5192531406879425,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029769092053174972,
"step": 170
},
{
"calibration/aurc": 0.2208552602109703,
"calibration/batch_distribution_entropy": 0.9657113588875912,
"calibration/buffer_distribution_entropy": 0.9895090286189452,
"calibration/confidence_entropy": 0.47121970246450573,
"calibration/coverage@0%": 0.050205249012967815,
"calibration/coverage@1%": 0.07447139382705784,
"calibration/coverage@10%": 0.37735943223929025,
"calibration/coverage@15%": 0.4405383529760919,
"calibration/coverage@20%": 0.5207640236801371,
"calibration/coverage@25%": 0.5850732988217104,
"calibration/coverage@30%": 0.6418992854569161,
"calibration/coverage@5%": 0.30050195244071853,
"calibration/ece": 0.10489993015645589,
"calibration/mean_confidence": 0.49981221301042106,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0044921875,
"completions/max_length": 1259.0,
"completions/max_terminated_length": 1259.0,
"completions/mean_length": 276.05908203125,
"completions/mean_terminated_length": 277.3196655273438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 113.8,
"epoch": 0.56,
"grad_norm": 0.005405076779425144,
"learning_rate": 1e-06,
"loss": -0.02,
"num_tokens": 600936800.0,
"reward": 0.9639453887939453,
"reward_std": 0.09017050564289093,
"rewards/accuracy_reward": 0.5365234375,
"rewards/batch_coverage_0": 0.41431847810745237,
"rewards/batch_coverage_1": 0.41431847810745237,
"rewards/batch_coverage_10": 0.45128042697906495,
"rewards/batch_coverage_15": 0.45638718008995055,
"rewards/batch_coverage_20": 0.4603978514671326,
"rewards/batch_coverage_25": 0.46246368288993833,
"rewards/batch_coverage_5": 0.44165197014808655,
"rewards/brier_reward": 0.8196909427642822,
"rewards/confidence_uniqueness_reward": 0.9461859464645386,
"rewards/format_reward": 0.9955078125,
"rewards/frontier_entropy_batch_reward": -0.22999619245529174,
"signal/accuracy_reward/centered_abs_mean": 0.07750244140625,
"signal/accuracy_reward/group_std_mean": 0.10900391191244126,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.696437931060791,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.038751220703125,
"signal/advantage_abs_mean": 0.7308009505271912,
"signal/advantage_pre_scale_abs_mean": 0.06338043585419655,
"signal/advantage_pre_scale_std": 0.1112355962395668,
"signal/advantage_std": 0.9826737999916076,
"signal/batch_coverage_0/centered_abs_mean": 0.13989091217517852,
"signal/batch_coverage_0/group_std_mean": 0.1777060866355896,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03682319894433021,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002000440075062215,
"signal/batch_coverage_1/centered_abs_mean": 0.13989091217517852,
"signal/batch_coverage_1/group_std_mean": 0.1777060866355896,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03682319894433021,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002000440075062215,
"signal/batch_coverage_10/centered_abs_mean": 0.15016445517539978,
"signal/batch_coverage_10/group_std_mean": 0.19216148853302,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03952023386955261,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002147351624444127,
"signal/batch_coverage_15/centered_abs_mean": 0.15080960988998413,
"signal/batch_coverage_15/group_std_mean": 0.19309694170951844,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.039715195447206496,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002156577631831169,
"signal/batch_coverage_20/centered_abs_mean": 0.1491364985704422,
"signal/batch_coverage_20/group_std_mean": 0.19163033962249756,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03939807564020157,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021326518384739757,
"signal/batch_coverage_25/centered_abs_mean": 0.1505493015050888,
"signal/batch_coverage_25/group_std_mean": 0.19344808757305146,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.039786546677351,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021528550423681736,
"signal/batch_coverage_5/centered_abs_mean": 0.1475885719060898,
"signal/batch_coverage_5/group_std_mean": 0.18809866905212402,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03886888325214386,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021105165826156734,
"signal/brier_reward/centered_abs_mean": 0.10162670761346818,
"signal/brier_reward/group_std_mean": 0.13524822592735292,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18657733201980592,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010162671282887458,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019791874662041666,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03511152528226376,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03647754043340683,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019791874103248118,
"signal/format_reward/centered_abs_mean": 0.008544921875,
"signal/format_reward/group_std_mean": 0.021593831665813922,
"signal/format_reward/group_zero_std_frac": 0.890625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07788022682070732,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0042724609375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27670013904571533,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3490628838539124,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5116437554359436,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027670014277100564,
"step": 175
},
{
"calibration/aurc": 0.24882780044253244,
"calibration/batch_distribution_entropy": 0.9585615809891646,
"calibration/buffer_distribution_entropy": 0.9896047917697167,
"calibration/confidence_entropy": 0.4563466045343387,
"calibration/coverage@0%": 0.06457074681948144,
"calibration/coverage@1%": 0.06614865608969842,
"calibration/coverage@10%": 0.2500396689641059,
"calibration/coverage@15%": 0.3361376770331115,
"calibration/coverage@20%": 0.45370766312894306,
"calibration/coverage@25%": 0.5531476208199892,
"calibration/coverage@30%": 0.6381766550021126,
"calibration/coverage@5%": 0.18364292817642075,
"calibration/ece": 0.09905783741164291,
"calibration/mean_confidence": 0.5324343582562876,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008984375,
"completions/max_length": 1335.6,
"completions/max_terminated_length": 1335.6,
"completions/mean_length": 276.25419921875,
"completions/mean_terminated_length": 278.7879333496094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 119.4,
"epoch": 0.576,
"grad_norm": 0.005157508887350559,
"learning_rate": 1e-06,
"loss": -0.0411,
"num_tokens": 618952267.0,
"reward": 0.9553242683410644,
"reward_std": 0.09966547191143035,
"rewards/accuracy_reward": 0.53291015625,
"rewards/batch_coverage_0": 0.41381676197052003,
"rewards/batch_coverage_1": 0.41381676197052003,
"rewards/batch_coverage_10": 0.44667200446128846,
"rewards/batch_coverage_15": 0.4532722055912018,
"rewards/batch_coverage_20": 0.45751644372940065,
"rewards/batch_coverage_25": 0.4595360100269318,
"rewards/batch_coverage_5": 0.4303095579147339,
"rewards/brier_reward": 0.8042745471000672,
"rewards/confidence_uniqueness_reward": 0.9406708955764771,
"rewards/format_reward": 0.991015625,
"rewards/frontier_entropy_batch_reward": -0.25104811489582063,
"signal/accuracy_reward/centered_abs_mean": 0.077618408203125,
"signal/accuracy_reward/group_std_mean": 0.10832233279943466,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.676012110710144,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0388092041015625,
"signal/advantage_abs_mean": 0.7248594284057617,
"signal/advantage_pre_scale_abs_mean": 0.06860132440924645,
"signal/advantage_pre_scale_std": 0.12584017515182494,
"signal/advantage_std": 0.9827669262886047,
"signal/batch_coverage_0/centered_abs_mean": 0.12729153782129288,
"signal/batch_coverage_0/group_std_mean": 0.1640220195055008,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03196005895733833,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018202689243480563,
"signal/batch_coverage_1/centered_abs_mean": 0.12729153782129288,
"signal/batch_coverage_1/group_std_mean": 0.1640220195055008,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03196005895733833,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018202689243480563,
"signal/batch_coverage_10/centered_abs_mean": 0.13694520890712739,
"signal/batch_coverage_10/group_std_mean": 0.17799520790576934,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03435723595321179,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.001958316517993808,
"signal/batch_coverage_15/centered_abs_mean": 0.1368080973625183,
"signal/batch_coverage_15/group_std_mean": 0.17819553017616271,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0343306839466095,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0019563557812944054,
"signal/batch_coverage_20/centered_abs_mean": 0.13851545453071595,
"signal/batch_coverage_20/group_std_mean": 0.1808491289615631,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.034758536517620085,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0019807710079476236,
"signal/batch_coverage_25/centered_abs_mean": 0.1401458889245987,
"signal/batch_coverage_25/group_std_mean": 0.18298054337501526,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.035132177919149396,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002004086133092642,
"signal/batch_coverage_5/centered_abs_mean": 0.13248585164546967,
"signal/batch_coverage_5/group_std_mean": 0.17141394913196564,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.033234558254480365,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0018945475341752172,
"signal/brier_reward/centered_abs_mean": 0.10222317129373551,
"signal/brier_reward/group_std_mean": 0.13865281045436859,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17827674746513367,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010222317650914192,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027541452273726463,
"signal/confidence_uniqueness_reward/group_std_mean": 0.049568860232830046,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0479873813688755,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002754145348444581,
"signal/format_reward/centered_abs_mean": 0.0166748046875,
"signal/format_reward/group_std_mean": 0.036608771234750745,
"signal/format_reward/group_zero_std_frac": 0.834375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.14411495625972748,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00833740234375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2816839575767517,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3570773422718048,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49249241352081297,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028168396279215812,
"step": 180
},
{
"calibration/aurc": 0.26415483037912674,
"calibration/batch_distribution_entropy": 0.9665828472427667,
"calibration/buffer_distribution_entropy": 0.988880973119419,
"calibration/confidence_entropy": 0.451765611193816,
"calibration/coverage@0%": 0.0738741045005866,
"calibration/coverage@1%": 0.14210939861823366,
"calibration/coverage@10%": 0.2970794215182419,
"calibration/coverage@15%": 0.3482093894656264,
"calibration/coverage@20%": 0.5227152602100702,
"calibration/coverage@25%": 0.5803462753799378,
"calibration/coverage@30%": 0.6305694300226883,
"calibration/coverage@5%": 0.21826030199905885,
"calibration/ece": 0.15480829878368255,
"calibration/mean_confidence": 0.48369991354676606,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00869140625,
"completions/max_length": 1418.6,
"completions/max_terminated_length": 1418.6,
"completions/mean_length": 270.27890625,
"completions/mean_terminated_length": 272.670263671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 118.4,
"epoch": 0.592,
"grad_norm": 0.005508663132786751,
"learning_rate": 1e-06,
"loss": -0.0399,
"num_tokens": 636887635.0,
"reward": 0.9581284046173095,
"reward_std": 0.09956228137016296,
"rewards/accuracy_reward": 0.53955078125,
"rewards/batch_coverage_0": 0.4133278489112854,
"rewards/batch_coverage_1": 0.4133278489112854,
"rewards/batch_coverage_10": 0.44717544317245483,
"rewards/batch_coverage_15": 0.4516117811203003,
"rewards/batch_coverage_20": 0.4561953365802765,
"rewards/batch_coverage_25": 0.4570241093635559,
"rewards/batch_coverage_5": 0.43275184035301206,
"rewards/brier_reward": 0.8061846494674683,
"rewards/confidence_uniqueness_reward": 0.9413105726242066,
"rewards/format_reward": 0.99130859375,
"rewards/frontier_entropy_batch_reward": -0.25972045958042145,
"signal/accuracy_reward/centered_abs_mean": 0.088555908203125,
"signal/accuracy_reward/group_std_mean": 0.11966662853956223,
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7483019828796387,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0442779541015625,
"signal/advantage_abs_mean": 0.730834710597992,
"signal/advantage_pre_scale_abs_mean": 0.07057239040732384,
"signal/advantage_pre_scale_std": 0.12372962981462479,
"signal/advantage_std": 0.9827643036842346,
"signal/batch_coverage_0/centered_abs_mean": 0.1381935030221939,
"signal/batch_coverage_0/group_std_mean": 0.17919765114784242,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.034601961821317674,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001976167061366141,
"signal/batch_coverage_1/centered_abs_mean": 0.1381935030221939,
"signal/batch_coverage_1/group_std_mean": 0.17919765114784242,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.034601961821317674,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001976167061366141,
"signal/batch_coverage_10/centered_abs_mean": 0.14485456943511962,
"signal/batch_coverage_10/group_std_mean": 0.19011588096618653,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03615746423602104,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020714202895760537,
"signal/batch_coverage_15/centered_abs_mean": 0.14329394698143005,
"signal/batch_coverage_15/group_std_mean": 0.18823909163475036,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.035794655233621596,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002049103332683444,
"signal/batch_coverage_20/centered_abs_mean": 0.14287793934345244,
"signal/batch_coverage_20/group_std_mean": 0.1883653312921524,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.035708678513765336,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020431545563042164,
"signal/batch_coverage_25/centered_abs_mean": 0.1435356080532074,
"signal/batch_coverage_25/group_std_mean": 0.18936876356601715,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03586432859301567,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020525591680780053,
"signal/batch_coverage_5/centered_abs_mean": 0.14292764365673066,
"signal/batch_coverage_5/group_std_mean": 0.18623073101043702,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03571743853390217,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020438652485609055,
"signal/brier_reward/centered_abs_mean": 0.10219440907239914,
"signal/brier_reward/group_std_mean": 0.13913445472717284,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17841624617576599,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010219440795481205,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026426216587424278,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046144616603851316,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04587032720446586,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026426217518746853,
"signal/format_reward/centered_abs_mean": 0.015716552734375,
"signal/format_reward/group_std_mean": 0.03357396759092808,
"signal/format_reward/group_zero_std_frac": 0.846875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1341138780117035,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0078582763671875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2888515055179596,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3633566081523895,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.506644070148468,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02888515144586563,
"step": 185
},
{
"calibration/aurc": 0.19599936396909629,
"calibration/batch_distribution_entropy": 0.9471727617725497,
"calibration/buffer_distribution_entropy": 0.9885040321363311,
"calibration/confidence_entropy": 0.4401101556912829,
"calibration/coverage@0%": 0.12422711072406271,
"calibration/coverage@1%": 0.13250435227854582,
"calibration/coverage@10%": 0.387829161677371,
"calibration/coverage@15%": 0.49070235172216375,
"calibration/coverage@20%": 0.5777213832141951,
"calibration/coverage@25%": 0.642066400167721,
"calibration/coverage@30%": 0.7210643750176389,
"calibration/coverage@5%": 0.2507637549338591,
"calibration/ece": 0.08982427851874988,
"calibration/mean_confidence": 0.47753607038301726,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0154296875,
"completions/max_length": 1333.6,
"completions/max_terminated_length": 1333.6,
"completions/mean_length": 287.44697265625,
"completions/mean_terminated_length": 291.9261962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 125.6,
"epoch": 0.608,
"grad_norm": 0.004835889209061861,
"learning_rate": 1e-06,
"loss": -0.059,
"num_tokens": 654830580.0,
"reward": 0.9553421139717102,
"reward_std": 0.1128614827990532,
"rewards/accuracy_reward": 0.54013671875,
"rewards/batch_coverage_0": 0.43205046057701113,
"rewards/batch_coverage_1": 0.43205046057701113,
"rewards/batch_coverage_10": 0.45896238684654234,
"rewards/batch_coverage_15": 0.46331515312194826,
"rewards/batch_coverage_20": 0.46950970888137816,
"rewards/batch_coverage_25": 0.4711323916912079,
"rewards/batch_coverage_5": 0.44750791788101196,
"rewards/brier_reward": 0.8163456797599793,
"rewards/confidence_uniqueness_reward": 0.9338125109672546,
"rewards/format_reward": 0.9845703125,
"rewards/frontier_entropy_batch_reward": -0.27422977685928346,
"signal/accuracy_reward/centered_abs_mean": 0.090240478515625,
"signal/accuracy_reward/group_std_mean": 0.1210777685046196,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7534727334976197,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451202392578125,
"signal/advantage_abs_mean": 0.7139726996421814,
"signal/advantage_pre_scale_abs_mean": 0.07716420143842698,
"signal/advantage_pre_scale_std": 0.14323937892913818,
"signal/advantage_std": 0.9828421831130981,
"signal/batch_coverage_0/centered_abs_mean": 0.13462951481342317,
"signal/batch_coverage_0/group_std_mean": 0.17377310991287231,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03228494115173817,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019252020167186857,
"signal/batch_coverage_1/centered_abs_mean": 0.13462951481342317,
"signal/batch_coverage_1/group_std_mean": 0.17377310991287231,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03228494115173817,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019252020167186857,
"signal/batch_coverage_10/centered_abs_mean": 0.1441013604402542,
"signal/batch_coverage_10/group_std_mean": 0.18676916658878326,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03450990542769432,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020606494974344967,
"signal/batch_coverage_15/centered_abs_mean": 0.1440788596868515,
"signal/batch_coverage_15/group_std_mean": 0.18676540851593018,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03451910987496376,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020603276556357742,
"signal/batch_coverage_20/centered_abs_mean": 0.14172255396842956,
"signal/batch_coverage_20/group_std_mean": 0.18521577417850493,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03395959660410881,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020266325445845723,
"signal/batch_coverage_25/centered_abs_mean": 0.1393471211194992,
"signal/batch_coverage_25/group_std_mean": 0.182856085896492,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03335911333560944,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0019926637876778843,
"signal/batch_coverage_5/centered_abs_mean": 0.1396337330341339,
"signal/batch_coverage_5/group_std_mean": 0.18044842779636383,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03343283012509346,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019967623986303805,
"signal/brier_reward/centered_abs_mean": 0.10754630565643311,
"signal/brier_reward/group_std_mean": 0.14926459193229674,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1797608643770218,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010754630714654923,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036745325475931165,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06699450463056564,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.061678997427225116,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036745326593518256,
"signal/format_reward/centered_abs_mean": 0.02781982421875,
"signal/format_reward/group_std_mean": 0.05717283710837364,
"signal/format_reward/group_zero_std_frac": 0.75,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2333482623100281,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013909912109375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29573175609111785,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36904223561286925,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4965230643749237,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029573175683617593,
"step": 190
},
{
"calibration/aurc": 0.19647939724342872,
"calibration/batch_distribution_entropy": 0.9693845430347352,
"calibration/buffer_distribution_entropy": 0.9883685744303129,
"calibration/confidence_entropy": 0.45914596746307695,
"calibration/coverage@0%": 0.07503450741176323,
"calibration/coverage@1%": 0.08932022169747753,
"calibration/coverage@10%": 0.33458487664513836,
"calibration/coverage@15%": 0.45889528752778574,
"calibration/coverage@20%": 0.5579918217023472,
"calibration/coverage@25%": 0.6432280912845998,
"calibration/coverage@30%": 0.7236939216479017,
"calibration/coverage@5%": 0.24648746194588217,
"calibration/ece": 0.12530991598547006,
"calibration/mean_confidence": 0.5254408385195928,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01689453125,
"completions/max_length": 1328.0,
"completions/max_terminated_length": 1328.0,
"completions/mean_length": 301.00869140625,
"completions/mean_terminated_length": 306.2028564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.8,
"epoch": 0.624,
"grad_norm": 0.0048108589835464954,
"learning_rate": 1e-06,
"loss": -0.0699,
"num_tokens": 673256813.0,
"reward": 0.9550261974334717,
"reward_std": 0.11695955246686936,
"rewards/accuracy_reward": 0.53720703125,
"rewards/batch_coverage_0": 0.4179235279560089,
"rewards/batch_coverage_1": 0.4179235279560089,
"rewards/batch_coverage_10": 0.45245782732963563,
"rewards/batch_coverage_15": 0.4575910210609436,
"rewards/batch_coverage_20": 0.46224952340126035,
"rewards/batch_coverage_25": 0.46388591527938844,
"rewards/batch_coverage_5": 0.4407612144947052,
"rewards/brier_reward": 0.8106015086174011,
"rewards/confidence_uniqueness_reward": 0.9344212055206299,
"rewards/format_reward": 0.98310546875,
"rewards/frontier_entropy_batch_reward": -0.2414526730775833,
"signal/accuracy_reward/centered_abs_mean": 0.092462158203125,
"signal/accuracy_reward/group_std_mean": 0.1260865330696106,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7334414839744567,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0462310791015625,
"signal/advantage_abs_mean": 0.7067392587661743,
"signal/advantage_pre_scale_abs_mean": 0.0787854865193367,
"signal/advantage_pre_scale_std": 0.14262742698192596,
"signal/advantage_std": 0.9829223394393921,
"signal/batch_coverage_0/centered_abs_mean": 0.15231256783008576,
"signal/batch_coverage_0/group_std_mean": 0.19604605734348296,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03467583134770393,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002178069809451699,
"signal/batch_coverage_1/centered_abs_mean": 0.15231256783008576,
"signal/batch_coverage_1/group_std_mean": 0.19604605734348296,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03467583134770393,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002178069809451699,
"signal/batch_coverage_10/centered_abs_mean": 0.16275534629821778,
"signal/batch_coverage_10/group_std_mean": 0.20997639298439025,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0370567686855793,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0023274014238268137,
"signal/batch_coverage_15/centered_abs_mean": 0.16307214200496672,
"signal/batch_coverage_15/group_std_mean": 0.2107671707868576,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03714829385280609,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0023319316562265156,
"signal/batch_coverage_20/centered_abs_mean": 0.1629529505968094,
"signal/batch_coverage_20/group_std_mean": 0.21109949946403503,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03712652400135994,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0023302271030843256,
"signal/batch_coverage_25/centered_abs_mean": 0.16004838049411774,
"signal/batch_coverage_25/group_std_mean": 0.20808916687965393,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03642952218651772,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002288691932335496,
"signal/batch_coverage_5/centered_abs_mean": 0.15944549441337585,
"signal/batch_coverage_5/group_std_mean": 0.20555467605590821,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.036301738768816,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022800705395638944,
"signal/brier_reward/centered_abs_mean": 0.11719284057617188,
"signal/brier_reward/group_std_mean": 0.15894266068935395,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18694722950458526,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011719284206628799,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03853886350989342,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07142309993505477,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.061740058660507205,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003853886341676116,
"signal/format_reward/centered_abs_mean": 0.030364990234375,
"signal/format_reward/group_std_mean": 0.06245248168706894,
"signal/format_reward/group_zero_std_frac": 0.728125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2433932214975357,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0151824951171875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2803094804286957,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35295377373695375,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4465214192867279,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02803094796836376,
"step": 195
},
{
"calibration/aurc": 0.22468700970935918,
"calibration/batch_distribution_entropy": 0.9575049131293181,
"calibration/buffer_distribution_entropy": 0.9880757439242596,
"calibration/confidence_entropy": 0.4600728120540899,
"calibration/coverage@0%": 0.09614797444733672,
"calibration/coverage@1%": 0.1255714336918695,
"calibration/coverage@10%": 0.3537883171019663,
"calibration/coverage@15%": 0.43703556909152963,
"calibration/coverage@20%": 0.5238639030046074,
"calibration/coverage@25%": 0.6189128796513287,
"calibration/coverage@30%": 0.6953550272097249,
"calibration/coverage@5%": 0.23783649905851054,
"calibration/ece": 0.13375213264156954,
"calibration/mean_confidence": 0.5644211647402342,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00927734375,
"completions/max_length": 1226.4,
"completions/max_terminated_length": 1226.4,
"completions/mean_length": 291.28251953125,
"completions/mean_terminated_length": 294.0164733886719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.64,
"grad_norm": 0.005845364648848772,
"learning_rate": 1e-06,
"loss": -0.0409,
"num_tokens": 691582234.0,
"reward": 0.9772037029266357,
"reward_std": 0.0971896544098854,
"rewards/accuracy_reward": 0.5740234375,
"rewards/batch_coverage_0": 0.44770985245704653,
"rewards/batch_coverage_1": 0.44770985245704653,
"rewards/batch_coverage_10": 0.47898629307746887,
"rewards/batch_coverage_15": 0.48492227792739867,
"rewards/batch_coverage_20": 0.48741101026535033,
"rewards/batch_coverage_25": 0.48993545174598696,
"rewards/batch_coverage_5": 0.46683679819107055,
"rewards/brier_reward": 0.8279487013816833,
"rewards/confidence_uniqueness_reward": 0.9387916803359986,
"rewards/format_reward": 0.99072265625,
"rewards/frontier_entropy_batch_reward": -0.29083598852157594,
"signal/accuracy_reward/centered_abs_mean": 0.0724365234375,
"signal/accuracy_reward/group_std_mean": 0.10086787045001984,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6660116136074066,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03621826171875,
"signal/advantage_abs_mean": 0.7312237024307251,
"signal/advantage_pre_scale_abs_mean": 0.0678968921303749,
"signal/advantage_pre_scale_std": 0.1250871941447258,
"signal/advantage_std": 0.9826941609382629,
"signal/batch_coverage_0/centered_abs_mean": 0.1281747579574585,
"signal/batch_coverage_0/group_std_mean": 0.16710792481899261,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03346509672701359,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018328989623114466,
"signal/batch_coverage_1/centered_abs_mean": 0.1281747579574585,
"signal/batch_coverage_1/group_std_mean": 0.16710792481899261,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03346509672701359,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018328989623114466,
"signal/batch_coverage_10/centered_abs_mean": 0.13769169449806212,
"signal/batch_coverage_10/group_std_mean": 0.18058500289916993,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.035960767045617105,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019689911743625997,
"signal/batch_coverage_15/centered_abs_mean": 0.13795506060123444,
"signal/batch_coverage_15/group_std_mean": 0.1810959905385971,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03599607348442078,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.001972757396288216,
"signal/batch_coverage_20/centered_abs_mean": 0.13890421986579896,
"signal/batch_coverage_20/group_std_mean": 0.1826791375875473,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03625572361052036,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0019863303285092117,
"signal/batch_coverage_25/centered_abs_mean": 0.13745348155498505,
"signal/batch_coverage_25/group_std_mean": 0.18095198273658752,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.035954632610082624,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0019655847921967507,
"signal/batch_coverage_5/centered_abs_mean": 0.1342063993215561,
"signal/batch_coverage_5/group_std_mean": 0.17566960155963898,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03503052368760109,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.001919151423498988,
"signal/brier_reward/centered_abs_mean": 0.09804150611162185,
"signal/brier_reward/group_std_mean": 0.1333908811211586,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.177875754237175,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009804150648415088,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028178646042943,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04986298829317093,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05152831450104713,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028178644832223656,
"signal/format_reward/centered_abs_mean": 0.016888427734375,
"signal/format_reward/group_std_mean": 0.036415594071149825,
"signal/format_reward/group_zero_std_frac": 0.834375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.15302720963954924,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0084442138671875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29316571950912473,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3644020974636078,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5366144418716431,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029316572844982146,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.44305710238719687,
"eval_calibration/batch_distribution_entropy": 0.8833562952678735,
"eval_calibration/buffer_distribution_entropy": 0.9873646700855458,
"eval_calibration/confidence_entropy": 0.46500058783929477,
"eval_calibration/coverage@0%": 0.1015625,
"eval_calibration/coverage@1%": 0.1015625,
"eval_calibration/coverage@10%": 0.1015625,
"eval_calibration/coverage@15%": 0.125,
"eval_calibration/coverage@20%": 0.1484375,
"eval_calibration/coverage@25%": 0.2578125,
"eval_calibration/coverage@30%": 0.328125,
"eval_calibration/coverage@5%": 0.1015625,
"eval_calibration/ece": 0.175218991086652,
"eval_calibration/mean_confidence": 0.4452587010502278,
"eval_completions/clipped_ratio": 0.004108297413793094,
"eval_completions/max_length": 791.0,
"eval_completions/max_terminated_length": 791.0,
"eval_completions/mean_length": 302.0158920288086,
"eval_completions/mean_terminated_length": 303.2757110595703,
"eval_completions/min_length": 66.25,
"eval_completions/min_terminated_length": 136.75,
"eval_loss": 0.0,
"eval_num_tokens": 691582234.0,
"eval_reward": 0.7945444732904434,
"eval_reward_std": 0.23430271446704865,
"eval_rewards/accuracy_reward": 0.416015625,
"eval_rewards/batch_coverage_0": 0.19991468638181686,
"eval_rewards/batch_coverage_1": 0.19991468638181686,
"eval_rewards/batch_coverage_10": 0.18121831491589546,
"eval_rewards/batch_coverage_15": 0.16571493819355965,
"eval_rewards/batch_coverage_20": 0.1504622232168913,
"eval_rewards/batch_coverage_25": 0.12405495345592499,
"eval_rewards/batch_coverage_5": 0.19991468638181686,
"eval_rewards/brier_reward": 0.8158539831638336,
"eval_rewards/confidence_uniqueness_reward": 0.8905068784952164,
"eval_rewards/format_reward": 0.99609375,
"eval_rewards/frontier_entropy_batch_reward": -0.99609375,
"eval_runtime": 45.8336,
"eval_samples_per_second": 10.909,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4730224609375,
"eval_signal/accuracy_reward/group_std_mean": 0.49389340728521347,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0130815505981445,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23651123046875,
"eval_signal/advantage_abs_mean": 0.9282233417034149,
"eval_signal/advantage_pre_scale_abs_mean": 0.21765509992837906,
"eval_signal/advantage_pre_scale_std": 0.23197273164987564,
"eval_signal/advantage_std": 0.987695038318634,
"eval_signal/batch_coverage_0/centered_abs_mean": 0.3202243745326996,
"eval_signal/batch_coverage_0/group_std_mean": 0.3795798420906067,
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.019652313785627484,
"eval_signal/batch_coverage_0/weight": 0.014299999922513962,
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.0045792084420099854,
"eval_signal/batch_coverage_1/centered_abs_mean": 0.3202243745326996,
"eval_signal/batch_coverage_1/group_std_mean": 0.3795798420906067,
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.019652313785627484,
"eval_signal/batch_coverage_1/weight": 0.014299999922513962,
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.0045792084420099854,
"eval_signal/batch_coverage_10/centered_abs_mean": 0.2864103727042675,
"eval_signal/batch_coverage_10/group_std_mean": 0.3406292721629143,
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01759169646538794,
"eval_signal/batch_coverage_10/weight": 0.014299999922513962,
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004095668322406709,
"eval_signal/batch_coverage_15/centered_abs_mean": 0.24582325667142868,
"eval_signal/batch_coverage_15/group_std_mean": 0.2919539734721184,
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015123256016522646,
"eval_signal/batch_coverage_15/weight": 0.014299999922513962,
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.0035152725758962333,
"eval_signal/batch_coverage_20/centered_abs_mean": 0.21553993970155716,
"eval_signal/batch_coverage_20/group_std_mean": 0.2582678012549877,
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013275267789140344,
"eval_signal/batch_coverage_20/weight": 0.014299999922513962,
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0030822211410850286,
"eval_signal/batch_coverage_25/centered_abs_mean": 0.17267810739576817,
"eval_signal/batch_coverage_25/group_std_mean": 0.20795756578445435,
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.010598070221021771,
"eval_signal/batch_coverage_25/weight": 0.014299999922513962,
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.002469296916387975,
"eval_signal/batch_coverage_5/centered_abs_mean": 0.3202243745326996,
"eval_signal/batch_coverage_5/group_std_mean": 0.3795798420906067,
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.019652313785627484,
"eval_signal/batch_coverage_5/weight": 0.014299999922513962,
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.0045792084420099854,
"eval_signal/brier_reward/centered_abs_mean": 0.18581411615014076,
"eval_signal/brier_reward/group_std_mean": 0.23873290419578552,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07966925017535686,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01858141180127859,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04621936194598675,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06602886598557234,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019818986766040325,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004621936357580125,
"eval_signal/format_reward/centered_abs_mean": 0.007568359375,
"eval_signal/format_reward/group_std_mean": 0.022097086533904076,
"eval_signal/format_reward/group_zero_std_frac": 0.875,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.016376281157135963,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.007568359375,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022097086533904076,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.875,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0032752566039562225,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0007568359724245965,
"eval_steps_per_second": 0.087,
"step": 200
},
{
"calibration/aurc": 0.38683176341008096,
"calibration/batch_distribution_entropy": 0.9805257817315436,
"calibration/buffer_distribution_entropy": 0.9879561129306393,
"calibration/confidence_entropy": 0.49477689589328566,
"calibration/coverage@0%": 0.003923112534299723,
"calibration/coverage@1%": 0.003923112534299723,
"calibration/coverage@10%": 0.012533680048976825,
"calibration/coverage@15%": 0.021561765810288375,
"calibration/coverage@20%": 0.15010488087906798,
"calibration/coverage@25%": 0.3062007384970836,
"calibration/coverage@30%": 0.4057120030238863,
"calibration/coverage@5%": 0.003923112534299723,
"calibration/ece": 0.10678735465585007,
"calibration/mean_confidence": 0.4535179735789959,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0029296875,
"completions/max_length": 1202.4,
"completions/max_terminated_length": 1202.4,
"completions/mean_length": 291.9689453125,
"completions/mean_terminated_length": 292.84205932617186,
"completions/min_length": 0.0,
"completions/min_terminated_length": 115.4,
"epoch": 0.656,
"grad_norm": 0.005559125449508429,
"learning_rate": 1e-06,
"loss": -0.0148,
"num_tokens": 709428540.0,
"reward": 0.9483369946479797,
"reward_std": 0.08952396661043167,
"rewards/accuracy_reward": 0.50654296875,
"rewards/batch_coverage_0": 0.39867132902145386,
"rewards/batch_coverage_1": 0.39867132902145386,
"rewards/batch_coverage_10": 0.4268028914928436,
"rewards/batch_coverage_15": 0.4317127227783203,
"rewards/batch_coverage_20": 0.43587940335273745,
"rewards/batch_coverage_25": 0.4377157986164093,
"rewards/batch_coverage_5": 0.4093734323978424,
"rewards/brier_reward": 0.8042103409767151,
"rewards/confidence_uniqueness_reward": 0.9491950392723083,
"rewards/format_reward": 0.99697265625,
"rewards/frontier_entropy_batch_reward": -0.20786570012569427,
"signal/accuracy_reward/centered_abs_mean": 0.083685302734375,
"signal/accuracy_reward/group_std_mean": 0.1158929094672203,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7622359275817872,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0418426513671875,
"signal/advantage_abs_mean": 0.7417587041854858,
"signal/advantage_pre_scale_abs_mean": 0.0644775539636612,
"signal/advantage_pre_scale_std": 0.11049062460660934,
"signal/advantage_std": 0.9826884031295776,
"signal/batch_coverage_0/centered_abs_mean": 0.1362083911895752,
"signal/batch_coverage_0/group_std_mean": 0.17440303266048432,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.035590830445289615,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019477800466120242,
"signal/batch_coverage_1/centered_abs_mean": 0.1362083911895752,
"signal/batch_coverage_1/group_std_mean": 0.17440303266048432,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.035590830445289615,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019477800466120242,
"signal/batch_coverage_10/centered_abs_mean": 0.14439732134342192,
"signal/batch_coverage_10/group_std_mean": 0.1864805370569229,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03772576525807381,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002064881706610322,
"signal/batch_coverage_15/centered_abs_mean": 0.14472835958004,
"signal/batch_coverage_15/group_std_mean": 0.18738215863704683,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03775163665413857,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020696154795587065,
"signal/batch_coverage_20/centered_abs_mean": 0.14556609988212585,
"signal/batch_coverage_20/group_std_mean": 0.18835518658161163,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.037985429912805554,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020815952215343715,
"signal/batch_coverage_25/centered_abs_mean": 0.14521074891090394,
"signal/batch_coverage_25/group_std_mean": 0.18830261528491973,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03792016953229904,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020765136461704968,
"signal/batch_coverage_5/centered_abs_mean": 0.13912782669067383,
"signal/batch_coverage_5/group_std_mean": 0.1785370737314224,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03636412620544434,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019895279547199607,
"signal/brier_reward/centered_abs_mean": 0.10540544837713242,
"signal/brier_reward/group_std_mean": 0.13930064737796782,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1928122252225876,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010540544986724854,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0166949987411499,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02866690754890442,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030591477081179617,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001669499883428216,
"signal/format_reward/centered_abs_mean": 0.005804443359375,
"signal/format_reward/group_std_mean": 0.015443699806928635,
"signal/format_reward/group_zero_std_frac": 0.91875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05277935266494751,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0029022216796875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2575319021940231,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3293066442012787,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47052150368690493,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025753191113471983,
"step": 205
},
{
"calibration/aurc": 0.29186164006012527,
"calibration/batch_distribution_entropy": 0.9697549633287637,
"calibration/buffer_distribution_entropy": 0.9895672644051153,
"calibration/confidence_entropy": 0.456708909674265,
"calibration/coverage@0%": 0.03845063369159059,
"calibration/coverage@1%": 0.06080357486806117,
"calibration/coverage@10%": 0.19964626309757696,
"calibration/coverage@15%": 0.24906825125678952,
"calibration/coverage@20%": 0.2925871476414731,
"calibration/coverage@25%": 0.3627023352931546,
"calibration/coverage@30%": 0.4953006002253554,
"calibration/coverage@5%": 0.14285450132901883,
"calibration/ece": 0.17465116802533398,
"calibration/mean_confidence": 0.5012703057618537,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001953125,
"completions/max_length": 1274.4,
"completions/max_terminated_length": 1274.4,
"completions/mean_length": 283.65693359375,
"completions/mean_terminated_length": 284.2161376953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.672,
"grad_norm": 0.007196416612714529,
"learning_rate": 1e-06,
"loss": -0.0065,
"num_tokens": 727246627.0,
"reward": 0.9564376831054687,
"reward_std": 0.08635174334049225,
"rewards/accuracy_reward": 0.52783203125,
"rewards/batch_coverage_0": 0.43465115427970885,
"rewards/batch_coverage_1": 0.43465115427970885,
"rewards/batch_coverage_10": 0.4558675825595856,
"rewards/batch_coverage_15": 0.4573832511901855,
"rewards/batch_coverage_20": 0.4624047577381134,
"rewards/batch_coverage_25": 0.46600645780563354,
"rewards/batch_coverage_5": 0.44994105100631715,
"rewards/brier_reward": 0.8067228317260742,
"rewards/confidence_uniqueness_reward": 0.9471780180931091,
"rewards/format_reward": 0.998046875,
"rewards/frontier_entropy_batch_reward": -0.27092793583869934,
"signal/accuracy_reward/centered_abs_mean": 0.092340087890625,
"signal/accuracy_reward/group_std_mean": 0.11898635029792785,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8868414759635925,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0461700439453125,
"signal/advantage_abs_mean": 0.7663137316703796,
"signal/advantage_pre_scale_abs_mean": 0.06598134487867355,
"signal/advantage_pre_scale_std": 0.10942787975072861,
"signal/advantage_std": 0.9825973153114319,
"signal/batch_coverage_0/centered_abs_mean": 0.14729402363300323,
"signal/batch_coverage_0/group_std_mean": 0.18762222528457642,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0407107375562191,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.00210630449000746,
"signal/batch_coverage_1/centered_abs_mean": 0.14729402363300323,
"signal/batch_coverage_1/group_std_mean": 0.18762222528457642,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0407107375562191,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.00210630449000746,
"signal/batch_coverage_10/centered_abs_mean": 0.152842777967453,
"signal/batch_coverage_10/group_std_mean": 0.19522206783294677,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.042253092676401136,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021856517065316438,
"signal/batch_coverage_15/centered_abs_mean": 0.15091572403907777,
"signal/batch_coverage_15/group_std_mean": 0.19295818507671356,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04171362891793251,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002158094779588282,
"signal/batch_coverage_20/centered_abs_mean": 0.14871154427528382,
"signal/batch_coverage_20/group_std_mean": 0.190624138712883,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04108003005385399,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021265751449391244,
"signal/batch_coverage_25/centered_abs_mean": 0.15191508829593658,
"signal/batch_coverage_25/group_std_mean": 0.19481480419635772,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04194371327757836,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002172385808080435,
"signal/batch_coverage_5/centered_abs_mean": 0.15180889368057252,
"signal/batch_coverage_5/group_std_mean": 0.19379131197929383,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.041916343942284585,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002170867146924138,
"signal/brier_reward/centered_abs_mean": 0.10807926654815674,
"signal/brier_reward/group_std_mean": 0.1404752403497696,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.208378604054451,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010807927139103413,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017506714165210723,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026788324862718583,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.033572429046034816,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001750671467743814,
"signal/format_reward/centered_abs_mean": 0.00374755859375,
"signal/format_reward/group_std_mean": 0.010039618238806725,
"signal/format_reward/group_zero_std_frac": 0.946875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03476648181676865,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001873779296875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.295850133895874,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3710406005382538,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5690179944038392,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029585013911128045,
"step": 210
},
{
"calibration/aurc": 0.27406756960213874,
"calibration/batch_distribution_entropy": 0.9594633878497765,
"calibration/buffer_distribution_entropy": 0.9895704882282012,
"calibration/confidence_entropy": 0.48489940278812715,
"calibration/coverage@0%": 0.006256115459882583,
"calibration/coverage@1%": 0.006256115459882583,
"calibration/coverage@10%": 0.21693447284735812,
"calibration/coverage@15%": 0.36089163405088065,
"calibration/coverage@20%": 0.46685420743639916,
"calibration/coverage@25%": 0.5340982754403131,
"calibration/coverage@30%": 0.6794879831213307,
"calibration/coverage@5%": 0.05547486545988258,
"calibration/ece": 0.13974260050395604,
"calibration/mean_confidence": 0.5057844655791297,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0013671875,
"completions/max_length": 1141.2,
"completions/max_terminated_length": 1141.2,
"completions/mean_length": 268.87275390625,
"completions/mean_terminated_length": 269.24306030273436,
"completions/min_length": 0.0,
"completions/min_terminated_length": 109.8,
"epoch": 0.688,
"grad_norm": 0.006476765964180231,
"learning_rate": 1e-06,
"loss": -0.0067,
"num_tokens": 744953804.0,
"reward": 0.9682312369346618,
"reward_std": 0.08881005644798279,
"rewards/accuracy_reward": 0.54833984375,
"rewards/batch_coverage_0": 0.39735434055328367,
"rewards/batch_coverage_1": 0.39735434055328367,
"rewards/batch_coverage_10": 0.4274145483970642,
"rewards/batch_coverage_15": 0.43324387073516846,
"rewards/batch_coverage_20": 0.4401055335998535,
"rewards/batch_coverage_25": 0.44292500615119934,
"rewards/batch_coverage_5": 0.41305392384529116,
"rewards/brier_reward": 0.8091730356216431,
"rewards/confidence_uniqueness_reward": 0.9507735729217529,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_entropy_batch_reward": -0.23455523550510407,
"signal/accuracy_reward/centered_abs_mean": 0.090570068359375,
"signal/accuracy_reward/group_std_mean": 0.12094291895627976,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8614253044128418,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0452850341796875,
"signal/advantage_abs_mean": 0.7606715440750123,
"signal/advantage_pre_scale_abs_mean": 0.06680728197097778,
"signal/advantage_pre_scale_std": 0.11225859969854354,
"signal/advantage_std": 0.9826042532920838,
"signal/batch_coverage_0/centered_abs_mean": 0.12686864733695985,
"signal/batch_coverage_0/group_std_mean": 0.1620851367712021,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03495507128536701,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018142216606065631,
"signal/batch_coverage_1/centered_abs_mean": 0.12686864733695985,
"signal/batch_coverage_1/group_std_mean": 0.1620851367712021,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03495507128536701,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018142216606065631,
"signal/batch_coverage_10/centered_abs_mean": 0.13474982529878615,
"signal/batch_coverage_10/group_std_mean": 0.17381620705127715,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.037191484868526456,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019269224954769014,
"signal/batch_coverage_15/centered_abs_mean": 0.1368393063545227,
"signal/batch_coverage_15/group_std_mean": 0.17675977051258088,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03781359381973744,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.001956802140921354,
"signal/batch_coverage_20/centered_abs_mean": 0.13707393407821655,
"signal/batch_coverage_20/group_std_mean": 0.17823283076286317,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.037855605408549306,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0019601572304964064,
"signal/batch_coverage_25/centered_abs_mean": 0.1342383250594139,
"signal/batch_coverage_25/group_std_mean": 0.17535412609577178,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03705654367804527,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.001919607981108129,
"signal/batch_coverage_5/centered_abs_mean": 0.13101311922073364,
"signal/batch_coverage_5/group_std_mean": 0.1678726315498352,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.036108778417110445,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0018734877230599523,
"signal/brier_reward/centered_abs_mean": 0.09857234209775925,
"signal/brier_reward/group_std_mean": 0.12943488359451294,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18886724412441253,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009857234545052052,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015221798233687878,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023016730323433876,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029279665648937227,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001522179855965078,
"signal/format_reward/centered_abs_mean": 0.00264892578125,
"signal/format_reward/group_std_mean": 0.0077339802403002976,
"signal/format_reward/group_zero_std_frac": 0.95625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02610982470214367,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28410218358039857,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.355901825428009,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5433173775672913,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02841021753847599,
"step": 215
},
{
"calibration/aurc": 0.19996440446534028,
"calibration/batch_distribution_entropy": 0.9567731892139981,
"calibration/buffer_distribution_entropy": 0.9900235492880984,
"calibration/confidence_entropy": 0.4509047069588788,
"calibration/coverage@0%": 0.02782164920762826,
"calibration/coverage@1%": 0.02782164920762826,
"calibration/coverage@10%": 0.30690071790702583,
"calibration/coverage@15%": 0.422679509731975,
"calibration/coverage@20%": 0.5450637356826291,
"calibration/coverage@25%": 0.6725818932024864,
"calibration/coverage@30%": 0.7527977329630483,
"calibration/coverage@5%": 0.19183424225854726,
"calibration/ece": 0.08909807422326481,
"calibration/mean_confidence": 0.536424021286505,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 1099.4,
"completions/max_terminated_length": 1099.4,
"completions/mean_length": 258.80654296875,
"completions/mean_terminated_length": 259.0838287353516,
"completions/min_length": 0.0,
"completions/min_terminated_length": 112.8,
"epoch": 0.704,
"grad_norm": 0.005805605091154575,
"learning_rate": 1e-06,
"loss": -0.0065,
"num_tokens": 762470127.0,
"reward": 0.9764017224311828,
"reward_std": 0.08271686583757401,
"rewards/accuracy_reward": 0.55458984375,
"rewards/batch_coverage_0": 0.4308918535709381,
"rewards/batch_coverage_1": 0.4308918535709381,
"rewards/batch_coverage_10": 0.4526013910770416,
"rewards/batch_coverage_15": 0.4595561683177948,
"rewards/batch_coverage_20": 0.466141951084137,
"rewards/batch_coverage_25": 0.46850630044937136,
"rewards/batch_coverage_5": 0.44223862886428833,
"rewards/brier_reward": 0.8262721180915833,
"rewards/confidence_uniqueness_reward": 0.950434684753418,
"rewards/format_reward": 0.99873046875,
"rewards/frontier_entropy_batch_reward": -0.22985949814319612,
"signal/accuracy_reward/centered_abs_mean": 0.080413818359375,
"signal/accuracy_reward/group_std_mean": 0.10305112153291703,
"signal/accuracy_reward/group_zero_std_frac": 0.71875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7740343332290649,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0402069091796875,
"signal/advantage_abs_mean": 0.7796499013900757,
"signal/advantage_pre_scale_abs_mean": 0.06386686488986015,
"signal/advantage_pre_scale_std": 0.10629049390554428,
"signal/advantage_std": 0.9825819253921508,
"signal/batch_coverage_0/centered_abs_mean": 0.13991228342056275,
"signal/batch_coverage_0/group_std_mean": 0.17689195573329924,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03878602460026741,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020007457584142684,
"signal/batch_coverage_1/centered_abs_mean": 0.13991228342056275,
"signal/batch_coverage_1/group_std_mean": 0.17689195573329924,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03878602460026741,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020007457584142684,
"signal/batch_coverage_10/centered_abs_mean": 0.14665709733963012,
"signal/batch_coverage_10/group_std_mean": 0.18625611662864686,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040611236542463305,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020971963647753,
"signal/batch_coverage_15/centered_abs_mean": 0.14724206030368805,
"signal/batch_coverage_15/group_std_mean": 0.18765614330768585,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040764973312616345,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021055614575743674,
"signal/batch_coverage_20/centered_abs_mean": 0.1504174143075943,
"signal/batch_coverage_20/group_std_mean": 0.19247474074363707,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04158801585435867,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021509690210223197,
"signal/batch_coverage_25/centered_abs_mean": 0.15163839161396026,
"signal/batch_coverage_25/group_std_mean": 0.1941525250673294,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04197103381156921,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002168428944423795,
"signal/batch_coverage_5/centered_abs_mean": 0.1431538850069046,
"signal/batch_coverage_5/group_std_mean": 0.18127716183662415,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.039674467593431476,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020471004536375403,
"signal/brier_reward/centered_abs_mean": 0.09896044880151748,
"signal/brier_reward/group_std_mean": 0.1299522638320923,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19169933199882508,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009896045178174972,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014785858243703843,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02252316027879715,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028771713748574256,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014785858802497387,
"signal/format_reward/centered_abs_mean": 0.002459716796875,
"signal/format_reward/group_std_mean": 0.007181553030386567,
"signal/format_reward/group_zero_std_frac": 0.959375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.023820652440190316,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28561203479766845,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35598132014274597,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5556212723255157,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02856120392680168,
"step": 220
},
{
"calibration/aurc": 0.2123510723959514,
"calibration/batch_distribution_entropy": 0.9698352011806625,
"calibration/buffer_distribution_entropy": 0.9893924504398015,
"calibration/confidence_entropy": 0.4456485289692901,
"calibration/coverage@0%": 0.08373927396300987,
"calibration/coverage@1%": 0.14006280337477456,
"calibration/coverage@10%": 0.2483180117081079,
"calibration/coverage@15%": 0.28428921268850005,
"calibration/coverage@20%": 0.5425642662887074,
"calibration/coverage@25%": 0.6289766227552664,
"calibration/coverage@30%": 0.7533506844518629,
"calibration/coverage@5%": 0.19556372249242163,
"calibration/ece": 0.14619324149307394,
"calibration/mean_confidence": 0.49175718656707834,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 1078.4,
"completions/max_terminated_length": 1078.4,
"completions/mean_length": 265.30068359375,
"completions/mean_terminated_length": 265.55845336914064,
"completions/min_length": 0.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.72,
"grad_norm": 0.006270843092352152,
"learning_rate": 1e-06,
"loss": -0.0032,
"num_tokens": 780196662.0,
"reward": 0.9827430009841919,
"reward_std": 0.08292621672153473,
"rewards/accuracy_reward": 0.5748046875,
"rewards/batch_coverage_0": 0.4352779507637024,
"rewards/batch_coverage_1": 0.4352779507637024,
"rewards/batch_coverage_10": 0.45996217131614686,
"rewards/batch_coverage_15": 0.4620618462562561,
"rewards/batch_coverage_20": 0.46976361274719236,
"rewards/batch_coverage_25": 0.47316410541534426,
"rewards/batch_coverage_5": 0.45016440749168396,
"rewards/brier_reward": 0.8190774083137512,
"rewards/confidence_uniqueness_reward": 0.9467625141143798,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_entropy_batch_reward": -0.26310152411460874,
"signal/accuracy_reward/centered_abs_mean": 0.0859619140625,
"signal/accuracy_reward/group_std_mean": 0.11375608295202255,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8471159696578979,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04298095703125,
"signal/advantage_abs_mean": 0.7652387022972107,
"signal/advantage_pre_scale_abs_mean": 0.06312915831804275,
"signal/advantage_pre_scale_std": 0.10508692562580109,
"signal/advantage_std": 0.9825536012649536,
"signal/batch_coverage_0/centered_abs_mean": 0.13881113529205322,
"signal/batch_coverage_0/group_std_mean": 0.17654796242713927,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.039122577756643295,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019849991658702493,
"signal/batch_coverage_1/centered_abs_mean": 0.13881113529205322,
"signal/batch_coverage_1/group_std_mean": 0.17654796242713927,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.039122577756643295,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019849991658702493,
"signal/batch_coverage_10/centered_abs_mean": 0.14516532719135283,
"signal/batch_coverage_10/group_std_mean": 0.18465124368667601,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04085480272769928,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002075864188373089,
"signal/batch_coverage_15/centered_abs_mean": 0.14553692936897278,
"signal/batch_coverage_15/group_std_mean": 0.18506303429603577,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04095983579754829,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020811779890209437,
"signal/batch_coverage_20/centered_abs_mean": 0.15031401813030243,
"signal/batch_coverage_20/group_std_mean": 0.19162946045398713,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04231066554784775,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.00214949045330286,
"signal/batch_coverage_25/centered_abs_mean": 0.15131179988384247,
"signal/batch_coverage_25/group_std_mean": 0.1932460606098175,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04257652685046196,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021637586876749994,
"signal/batch_coverage_5/centered_abs_mean": 0.14311102628707886,
"signal/batch_coverage_5/group_std_mean": 0.18209123611450195,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04032421484589577,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002046487620100379,
"signal/brier_reward/centered_abs_mean": 0.1019532933831215,
"signal/brier_reward/group_std_mean": 0.1325368106365204,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2007545202970505,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010195329040288924,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016168445721268652,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023263829201459883,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03172456994652748,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016168446512892843,
"signal/format_reward/centered_abs_mean": 0.00189208984375,
"signal/format_reward/group_std_mean": 0.00552427158690989,
"signal/format_reward/group_zero_std_frac": 0.96875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01848184745758772,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2896900773048401,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36198447942733764,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5700592041015625,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02896900735795498,
"step": 225
},
{
"calibration/aurc": 0.19987491369889768,
"calibration/batch_distribution_entropy": 0.9682028960945696,
"calibration/buffer_distribution_entropy": 0.9892767039546803,
"calibration/confidence_entropy": 0.4514352722551944,
"calibration/coverage@0%": 0.033621269569471625,
"calibration/coverage@1%": 0.033621269569471625,
"calibration/coverage@10%": 0.19240918542074364,
"calibration/coverage@15%": 0.43307546477495096,
"calibration/coverage@20%": 0.5825441841976516,
"calibration/coverage@25%": 0.7206694899706457,
"calibration/coverage@30%": 0.8360804488747553,
"calibration/coverage@5%": 0.07077574608610568,
"calibration/ece": 0.1259782288823052,
"calibration/mean_confidence": 0.5549969590745947,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00126953125,
"completions/max_length": 987.0,
"completions/max_terminated_length": 987.0,
"completions/mean_length": 279.36240234375,
"completions/mean_terminated_length": 279.71857299804685,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.2,
"epoch": 0.736,
"grad_norm": 0.006227577105164528,
"learning_rate": 1e-06,
"loss": -0.0031,
"num_tokens": 797996917.0,
"reward": 0.990349292755127,
"reward_std": 0.07996446937322617,
"rewards/accuracy_reward": 0.584765625,
"rewards/batch_coverage_0": 0.4305130660533905,
"rewards/batch_coverage_1": 0.4305130660533905,
"rewards/batch_coverage_10": 0.461780971288681,
"rewards/batch_coverage_15": 0.4661433219909668,
"rewards/batch_coverage_20": 0.47316175103187563,
"rewards/batch_coverage_25": 0.4773706912994385,
"rewards/batch_coverage_5": 0.4513450086116791,
"rewards/brier_reward": 0.8201140403747559,
"rewards/confidence_uniqueness_reward": 0.9489932417869568,
"rewards/format_reward": 0.99873046875,
"rewards/frontier_entropy_batch_reward": -0.23938325345516204,
"signal/accuracy_reward/centered_abs_mean": 0.0771484375,
"signal/accuracy_reward/group_std_mean": 0.1027161180973053,
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7959671258926392,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03857421875,
"signal/advantage_abs_mean": 0.7633894324302674,
"signal/advantage_pre_scale_abs_mean": 0.060272646695375444,
"signal/advantage_pre_scale_std": 0.10310345590114593,
"signal/advantage_std": 0.9824482798576355,
"signal/batch_coverage_0/centered_abs_mean": 0.12894001603126526,
"signal/batch_coverage_0/group_std_mean": 0.16431694328784943,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0384144626557827,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018438422121107578,
"signal/batch_coverage_1/centered_abs_mean": 0.12894001603126526,
"signal/batch_coverage_1/group_std_mean": 0.16431694328784943,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0384144626557827,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018438422121107578,
"signal/batch_coverage_10/centered_abs_mean": 0.13949068188667296,
"signal/batch_coverage_10/group_std_mean": 0.17866944074630736,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.041481245309114456,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019947167951613665,
"signal/batch_coverage_15/centered_abs_mean": 0.14124009311199187,
"signal/batch_coverage_15/group_std_mean": 0.1812742084264755,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04199150204658508,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002019733446650207,
"signal/batch_coverage_20/centered_abs_mean": 0.1427465260028839,
"signal/batch_coverage_20/group_std_mean": 0.18400496244430542,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04241336733102798,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020412752870470285,
"signal/batch_coverage_25/centered_abs_mean": 0.1435283601284027,
"signal/batch_coverage_25/group_std_mean": 0.18493232727050782,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04260774925351143,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002052455488592386,
"signal/batch_coverage_5/centered_abs_mean": 0.1360446184873581,
"signal/batch_coverage_5/group_std_mean": 0.17352403700351715,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.040481310337781906,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019454380497336387,
"signal/brier_reward/centered_abs_mean": 0.09750867635011673,
"signal/brier_reward/group_std_mean": 0.12824745923280717,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20277776420116425,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009750867821276188,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015282927080988884,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022277648746967315,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03166734613478184,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015282927313819527,
"signal/format_reward/centered_abs_mean": 0.002423095703125,
"signal/format_reward/group_std_mean": 0.006449723429977894,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.025392506457865237,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0012115478515625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2773844122886658,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3488776504993439,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5746127367019653,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02773844264447689,
"step": 230
},
{
"calibration/aurc": 0.23799171771509514,
"calibration/batch_distribution_entropy": 0.980168539402238,
"calibration/buffer_distribution_entropy": 0.9898585157062338,
"calibration/confidence_entropy": 0.4811932019738524,
"calibration/coverage@0%": 0.0625091911764706,
"calibration/coverage@1%": 0.0675873161764706,
"calibration/coverage@10%": 0.2684972426470588,
"calibration/coverage@15%": 0.36816942401960784,
"calibration/coverage@20%": 0.4830346200980392,
"calibration/coverage@25%": 0.5702052696078431,
"calibration/coverage@30%": 0.6597074142156862,
"calibration/coverage@5%": 0.14571231617647057,
"calibration/ece": 0.10425353764749187,
"calibration/mean_confidence": 0.4810001862033529,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001171875,
"completions/max_length": 926.4,
"completions/max_terminated_length": 926.4,
"completions/mean_length": 269.08896484375,
"completions/mean_terminated_length": 269.4028564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.8,
"epoch": 0.752,
"grad_norm": 0.005794130731374025,
"learning_rate": 1e-06,
"loss": -0.0015,
"num_tokens": 815979588.0,
"reward": 0.9808092594146729,
"reward_std": 0.08298141807317734,
"rewards/accuracy_reward": 0.5640625,
"rewards/batch_coverage_0": 0.4113637089729309,
"rewards/batch_coverage_1": 0.4113637089729309,
"rewards/batch_coverage_10": 0.44650899767875674,
"rewards/batch_coverage_15": 0.45241751074790953,
"rewards/batch_coverage_20": 0.4543795883655548,
"rewards/batch_coverage_25": 0.4559151649475098,
"rewards/batch_coverage_5": 0.4355119466781616,
"rewards/brier_reward": 0.8173823356628418,
"rewards/confidence_uniqueness_reward": 0.950950539112091,
"rewards/format_reward": 0.998828125,
"rewards/frontier_entropy_batch_reward": -0.21333999633789064,
"signal/accuracy_reward/centered_abs_mean": 0.0805908203125,
"signal/accuracy_reward/group_std_mean": 0.10987765192985535,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7611582159996033,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04029541015625,
"signal/advantage_abs_mean": 0.7552126884460449,
"signal/advantage_pre_scale_abs_mean": 0.061991773545742035,
"signal/advantage_pre_scale_std": 0.10449737906455994,
"signal/advantage_std": 0.982585608959198,
"signal/batch_coverage_0/centered_abs_mean": 0.13574532866477967,
"signal/batch_coverage_0/group_std_mean": 0.1734715759754181,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03727240189909935,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019411582965403796,
"signal/batch_coverage_1/centered_abs_mean": 0.13574532866477967,
"signal/batch_coverage_1/group_std_mean": 0.1734715759754181,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03727240189909935,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019411582965403796,
"signal/batch_coverage_10/centered_abs_mean": 0.14469059854745864,
"signal/batch_coverage_10/group_std_mean": 0.186430162191391,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03972938433289528,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.00206907547544688,
"signal/batch_coverage_15/centered_abs_mean": 0.14610361456871032,
"signal/batch_coverage_15/group_std_mean": 0.18892171382904052,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040112358331680295,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020892817061394452,
"signal/batch_coverage_20/centered_abs_mean": 0.14735167622566223,
"signal/batch_coverage_20/group_std_mean": 0.19071140885353088,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04044778645038605,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002107128966599703,
"signal/batch_coverage_25/centered_abs_mean": 0.1486743688583374,
"signal/batch_coverage_25/group_std_mean": 0.19229290783405303,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04084197878837585,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002126043476164341,
"signal/batch_coverage_5/centered_abs_mean": 0.14109062254428864,
"signal/batch_coverage_5/group_std_mean": 0.1808608740568161,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03873511925339699,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020175960380584,
"signal/brier_reward/centered_abs_mean": 0.09518031924962997,
"signal/brier_reward/group_std_mean": 0.12545545548200607,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18371520936489105,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009518032148480415,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013876733742654324,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020745597779750824,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026762987300753594,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013876733602955938,
"signal/format_reward/centered_abs_mean": 0.00225830078125,
"signal/format_reward/group_std_mean": 0.0062928175088018175,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.021861393004655838,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001129150390625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2675036698579788,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.341668963432312,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5147171020507812,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026750367507338522,
"step": 235
},
{
"calibration/aurc": 0.24226126920454516,
"calibration/batch_distribution_entropy": 0.9618339604859422,
"calibration/buffer_distribution_entropy": 0.990064646374362,
"calibration/confidence_entropy": 0.4670138911337166,
"calibration/coverage@0%": 0.09453125,
"calibration/coverage@1%": 0.148046875,
"calibration/coverage@10%": 0.29375,
"calibration/coverage@15%": 0.328125,
"calibration/coverage@20%": 0.46796875,
"calibration/coverage@25%": 0.547265625,
"calibration/coverage@30%": 0.644140625,
"calibration/coverage@5%": 0.244921875,
"calibration/ece": 0.13594110362016193,
"calibration/mean_confidence": 0.5271706740493869,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1110.0,
"completions/max_terminated_length": 1110.0,
"completions/mean_length": 265.80791015625,
"completions/mean_terminated_length": 265.9118988037109,
"completions/min_length": 23.6,
"completions/min_terminated_length": 119.2,
"epoch": 0.768,
"grad_norm": 0.006786343641579151,
"learning_rate": 1e-06,
"loss": -0.0046,
"num_tokens": 833634165.0,
"reward": 0.9572334289550781,
"reward_std": 0.07809762060642242,
"rewards/accuracy_reward": 0.5166015625,
"rewards/batch_coverage_0": 0.4477431833744049,
"rewards/batch_coverage_1": 0.4477431833744049,
"rewards/batch_coverage_10": 0.47420247197151183,
"rewards/batch_coverage_15": 0.47486208081245423,
"rewards/batch_coverage_20": 0.4812659859657288,
"rewards/batch_coverage_25": 0.481998997926712,
"rewards/batch_coverage_5": 0.4603855133056641,
"rewards/brier_reward": 0.8310378670692444,
"rewards/confidence_uniqueness_reward": 0.9483831882476806,
"rewards/format_reward": 0.999609375,
"rewards/frontier_entropy_batch_reward": -0.2554940521717072,
"signal/accuracy_reward/centered_abs_mean": 0.074072265625,
"signal/accuracy_reward/group_std_mean": 0.09812594801187516,
"signal/accuracy_reward/group_zero_std_frac": 0.721875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7703619718551635,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0370361328125,
"signal/advantage_abs_mean": 0.7758382797241211,
"signal/advantage_pre_scale_abs_mean": 0.060599684715270996,
"signal/advantage_pre_scale_std": 0.10211093574762345,
"signal/advantage_std": 0.9824242711067199,
"signal/batch_coverage_0/centered_abs_mean": 0.13136267066001892,
"signal/batch_coverage_0/group_std_mean": 0.16766657829284667,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03942425549030304,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018784862011671066,
"signal/batch_coverage_1/centered_abs_mean": 0.13136267066001892,
"signal/batch_coverage_1/group_std_mean": 0.16766657829284667,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03942425549030304,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018784862011671066,
"signal/batch_coverage_10/centered_abs_mean": 0.13855439722537993,
"signal/batch_coverage_10/group_std_mean": 0.17847085297107695,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.041663312911987306,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019813278689980508,
"signal/batch_coverage_15/centered_abs_mean": 0.13833429515361786,
"signal/batch_coverage_15/group_std_mean": 0.17840952575206756,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04161200672388077,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0019781803712248802,
"signal/batch_coverage_20/centered_abs_mean": 0.14173888862133027,
"signal/batch_coverage_20/group_std_mean": 0.18329765796661376,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04259318187832832,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020268660970032214,
"signal/batch_coverage_25/centered_abs_mean": 0.14233888685703278,
"signal/batch_coverage_25/group_std_mean": 0.18399560451507568,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04274929314851761,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020354459527879955,
"signal/batch_coverage_5/centered_abs_mean": 0.13465096652507783,
"signal/batch_coverage_5/group_std_mean": 0.1721017152070999,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.040450369566679,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.001925508794374764,
"signal/brier_reward/centered_abs_mean": 0.0896749809384346,
"signal/brier_reward/group_std_mean": 0.11739355921745301,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18861537277698517,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.008967497944831848,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014798154309391975,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019825227186083795,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031269267573952673,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014798154355958105,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007893532142043113,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2812827706336975,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3491488456726074,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5921412825584411,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028128278255462647,
"step": 240
},
{
"calibration/aurc": 0.24932977897988673,
"calibration/batch_distribution_entropy": 0.9554029734603953,
"calibration/buffer_distribution_entropy": 0.9895763593851804,
"calibration/confidence_entropy": 0.42567792957542994,
"calibration/coverage@0%": 0.09453201443248532,
"calibration/coverage@1%": 0.10937576443248531,
"calibration/coverage@10%": 0.3043740826810176,
"calibration/coverage@15%": 0.3973940496575342,
"calibration/coverage@20%": 0.4798426797945206,
"calibration/coverage@25%": 0.5415835983365949,
"calibration/coverage@30%": 0.5865322284735812,
"calibration/coverage@5%": 0.21290973581213307,
"calibration/ece": 0.13702020641332538,
"calibration/mean_confidence": 0.5154663940828144,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 852.4,
"completions/max_terminated_length": 852.4,
"completions/mean_length": 260.136328125,
"completions/mean_terminated_length": 260.23645935058596,
"completions/min_length": 71.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.784,
"grad_norm": 0.006491308566182852,
"learning_rate": 1e-06,
"loss": 0.0036,
"num_tokens": 851472329.0,
"reward": 0.9818279027938843,
"reward_std": 0.07545744925737381,
"rewards/accuracy_reward": 0.57529296875,
"rewards/batch_coverage_0": 0.43470929861068724,
"rewards/batch_coverage_1": 0.43470929861068724,
"rewards/batch_coverage_10": 0.44828411340713503,
"rewards/batch_coverage_15": 0.4518063485622406,
"rewards/batch_coverage_20": 0.4546870529651642,
"rewards/batch_coverage_25": 0.4580719113349915,
"rewards/batch_coverage_5": 0.44084044694900515,
"rewards/brier_reward": 0.7951297760009766,
"rewards/confidence_uniqueness_reward": 0.9488907814025879,
"rewards/format_reward": 0.999609375,
"rewards/frontier_entropy_batch_reward": -0.24685774147510528,
"signal/accuracy_reward/centered_abs_mean": 0.076690673828125,
"signal/accuracy_reward/group_std_mean": 0.09880765974521637,
"signal/accuracy_reward/group_zero_std_frac": 0.728125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7718483328819274,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0383453369140625,
"signal/advantage_abs_mean": 0.7896058082580566,
"signal/advantage_pre_scale_abs_mean": 0.05918871089816093,
"signal/advantage_pre_scale_std": 0.09708251059055328,
"signal/advantage_std": 0.9824390530586242,
"signal/batch_coverage_0/centered_abs_mean": 0.14222476333379747,
"signal/batch_coverage_0/group_std_mean": 0.17862839102745057,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04163095131516457,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020338141359388827,
"signal/batch_coverage_1/centered_abs_mean": 0.14222476333379747,
"signal/batch_coverage_1/group_std_mean": 0.17862839102745057,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04163095131516457,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020338141359388827,
"signal/batch_coverage_10/centered_abs_mean": 0.14671584963798523,
"signal/batch_coverage_10/group_std_mean": 0.1848911762237549,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.042957622557878494,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002098036650568247,
"signal/batch_coverage_15/centered_abs_mean": 0.1478523552417755,
"signal/batch_coverage_15/group_std_mean": 0.18667379319667815,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.043281296640634535,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021142886485904454,
"signal/batch_coverage_20/centered_abs_mean": 0.14663981795310974,
"signal/batch_coverage_20/group_std_mean": 0.1858953207731247,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04296448454260826,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020969494245946406,
"signal/batch_coverage_25/centered_abs_mean": 0.14821869730949402,
"signal/batch_coverage_25/group_std_mean": 0.1883733570575714,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04351719543337822,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021195273380726575,
"signal/batch_coverage_5/centered_abs_mean": 0.14388411343097687,
"signal/batch_coverage_5/group_std_mean": 0.1807178020477295,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04210501462221146,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002057542884722352,
"signal/brier_reward/centered_abs_mean": 0.10256388038396835,
"signal/brier_reward/group_std_mean": 0.13275916874408722,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2102281779050827,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01025638859719038,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01494374144822359,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019778795540332794,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03137281015515327,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014943741960451006,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008183719962835312,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29007603526115416,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36126854419708254,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6033417701721191,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029007603973150255,
"step": 245
},
{
"calibration/aurc": 0.16903278555018358,
"calibration/batch_distribution_entropy": 0.957089997582391,
"calibration/buffer_distribution_entropy": 0.989071790702158,
"calibration/confidence_entropy": 0.4465539348579215,
"calibration/coverage@0%": 0.11685879403131114,
"calibration/coverage@1%": 0.15361958781800392,
"calibration/coverage@10%": 0.4617462695694717,
"calibration/coverage@15%": 0.5513117661448141,
"calibration/coverage@20%": 0.6224712573385519,
"calibration/coverage@25%": 0.6889233732876712,
"calibration/coverage@30%": 0.7768644508317025,
"calibration/coverage@5%": 0.3037181996086106,
"calibration/ece": 0.11512091478718908,
"calibration/mean_confidence": 0.4906129472813886,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 875.6,
"completions/max_terminated_length": 875.6,
"completions/mean_length": 248.35615234375,
"completions/mean_terminated_length": 248.42735900878907,
"completions/min_length": 73.0,
"completions/min_terminated_length": 121.8,
"epoch": 0.8,
"grad_norm": 0.0064397272653877735,
"learning_rate": 1e-06,
"loss": 0.0017,
"num_tokens": 869026056.0,
"reward": 0.9991943836212158,
"reward_std": 0.0748121589422226,
"rewards/accuracy_reward": 0.6056640625,
"rewards/batch_coverage_0": 0.44797525405883787,
"rewards/batch_coverage_1": 0.44797525405883787,
"rewards/batch_coverage_10": 0.47836520075798034,
"rewards/batch_coverage_15": 0.48425130248069764,
"rewards/batch_coverage_20": 0.4894311368465424,
"rewards/batch_coverage_25": 0.4941446602344513,
"rewards/batch_coverage_5": 0.466780948638916,
"rewards/brier_reward": 0.8302229762077331,
"rewards/confidence_uniqueness_reward": 0.9474293708801269,
"rewards/format_reward": 0.999609375,
"rewards/frontier_entropy_batch_reward": -0.2852515548467636,
"signal/accuracy_reward/centered_abs_mean": 0.0673095703125,
"signal/accuracy_reward/group_std_mean": 0.09558814465999603,
"signal/accuracy_reward/group_zero_std_frac": 0.70625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7255583643913269,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03365478515625,
"signal/advantage_abs_mean": 0.7604370594024659,
"signal/advantage_pre_scale_abs_mean": 0.05594034641981125,
"signal/advantage_pre_scale_std": 0.09674849212169648,
"signal/advantage_std": 0.9823638439178467,
"signal/batch_coverage_0/centered_abs_mean": 0.13314661681652068,
"signal/batch_coverage_0/group_std_mean": 0.17028163969516755,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04120338633656502,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019039965933188795,
"signal/batch_coverage_1/centered_abs_mean": 0.13314661681652068,
"signal/batch_coverage_1/group_std_mean": 0.17028163969516755,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04120338633656502,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019039965933188795,
"signal/batch_coverage_10/centered_abs_mean": 0.14108843207359315,
"signal/batch_coverage_10/group_std_mean": 0.1814536929130554,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043681205809116365,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020175645360723136,
"signal/batch_coverage_15/centered_abs_mean": 0.14084658324718474,
"signal/batch_coverage_15/group_std_mean": 0.1811590611934662,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04360946193337441,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020141060929745437,
"signal/batch_coverage_20/centered_abs_mean": 0.14108088612556458,
"signal/batch_coverage_20/group_std_mean": 0.18242039680480956,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04367534294724464,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002017456665635109,
"signal/batch_coverage_25/centered_abs_mean": 0.1456657886505127,
"signal/batch_coverage_25/group_std_mean": 0.18809856474399567,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04508034512400627,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002083020796999335,
"signal/batch_coverage_5/centered_abs_mean": 0.13754327595233917,
"signal/batch_coverage_5/group_std_mean": 0.1764551192522049,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04256564825773239,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.00196686873678118,
"signal/brier_reward/centered_abs_mean": 0.09132596403360367,
"signal/brier_reward/group_std_mean": 0.11983357220888138,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19744545817375184,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009132596850395202,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015021534264087677,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01983652338385582,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03249132037162781,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015021534636616707,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007924291491508483,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2975069582462311,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3681724011898041,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6431950688362121,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0297506969422102,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.4056399663475222,
"eval_calibration/batch_distribution_entropy": 0.9463703104773048,
"eval_calibration/buffer_distribution_entropy": 0.9892823149568434,
"eval_calibration/confidence_entropy": 0.4557911057849003,
"eval_calibration/coverage@0%": 0.1171875,
"eval_calibration/coverage@1%": 0.1171875,
"eval_calibration/coverage@10%": 0.15625,
"eval_calibration/coverage@15%": 0.171875,
"eval_calibration/coverage@20%": 0.2265625,
"eval_calibration/coverage@25%": 0.3359375,
"eval_calibration/coverage@30%": 0.375,
"eval_calibration/coverage@5%": 0.1171875,
"eval_calibration/ece": 0.208780844006349,
"eval_calibration/mean_confidence": 0.47532201440066835,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 655.25,
"eval_completions/max_terminated_length": 655.25,
"eval_completions/mean_length": 259.66365814208984,
"eval_completions/mean_terminated_length": 259.66365814208984,
"eval_completions/min_length": 132.25,
"eval_completions/min_terminated_length": 132.25,
"eval_loss": 0.0,
"eval_num_tokens": 869026056.0,
"eval_reward": 0.808761790394783,
"eval_reward_std": 0.23153527826070786,
"eval_rewards/accuracy_reward": 0.4453125,
"eval_rewards/batch_coverage_0": 0.16463664919137955,
"eval_rewards/batch_coverage_1": 0.16463664919137955,
"eval_rewards/batch_coverage_10": 0.151813842356205,
"eval_rewards/batch_coverage_15": 0.15031159296631813,
"eval_rewards/batch_coverage_20": 0.12830344960093498,
"eval_rewards/batch_coverage_25": 0.10411388799548149,
"eval_rewards/batch_coverage_5": 0.16463664919137955,
"eval_rewards/brier_reward": 0.8128635436296463,
"eval_rewards/confidence_uniqueness_reward": 0.901123046875,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 29.3019,
"eval_samples_per_second": 17.064,
"eval_signal/accuracy_reward/centered_abs_mean": 0.473876953125,
"eval_signal/accuracy_reward/group_std_mean": 0.4942095950245857,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0256902873516083,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2369384765625,
"eval_signal/advantage_abs_mean": 0.9391559809446335,
"eval_signal/advantage_pre_scale_abs_mean": 0.21801294013857841,
"eval_signal/advantage_pre_scale_std": 0.22905661538243294,
"eval_signal/advantage_std": 0.9876904189586639,
"eval_signal/batch_coverage_0/centered_abs_mean": 0.28715651854872704,
"eval_signal/batch_coverage_0/group_std_mean": 0.3519328162074089,
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017829164396971464,
"eval_signal/batch_coverage_0/weight": 0.014299999922513962,
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004106338135898113,
"eval_signal/batch_coverage_1/centered_abs_mean": 0.28715651854872704,
"eval_signal/batch_coverage_1/group_std_mean": 0.3519328162074089,
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017829164396971464,
"eval_signal/batch_coverage_1/weight": 0.014299999922513962,
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004106338135898113,
"eval_signal/batch_coverage_10/centered_abs_mean": 0.2590860575437546,
"eval_signal/batch_coverage_10/group_std_mean": 0.31707237660884857,
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01609125966206193,
"eval_signal/batch_coverage_10/weight": 0.014299999922513962,
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.0037049305392429233,
"eval_signal/batch_coverage_15/centered_abs_mean": 0.25385092943906784,
"eval_signal/batch_coverage_15/group_std_mean": 0.31071092188358307,
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015773584134876728,
"eval_signal/batch_coverage_15/weight": 0.014299999922513962,
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.003630068327765912,
"eval_signal/batch_coverage_20/centered_abs_mean": 0.1938122659921646,
"eval_signal/batch_coverage_20/group_std_mean": 0.23961098864674568,
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012016238179057837,
"eval_signal/batch_coverage_20/weight": 0.014299999922513962,
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0027715153992176056,
"eval_signal/batch_coverage_25/centered_abs_mean": 0.14399352297186852,
"eval_signal/batch_coverage_25/group_std_mean": 0.1793827824294567,
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.008922932553105056,
"eval_signal/batch_coverage_25/weight": 0.014299999922513962,
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020591074135154486,
"eval_signal/batch_coverage_5/centered_abs_mean": 0.28715651854872704,
"eval_signal/batch_coverage_5/group_std_mean": 0.3519328162074089,
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017829164396971464,
"eval_signal/batch_coverage_5/weight": 0.014299999922513962,
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004106338135898113,
"eval_signal/brier_reward/centered_abs_mean": 0.18446644395589828,
"eval_signal/brier_reward/group_std_mean": 0.23762714117765427,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08009063266217709,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018446644535288215,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037994384765625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04532748367637396,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.016460294369608164,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037994384765625,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.137,
"step": 250
},
{
"calibration/aurc": 0.19802390820258206,
"calibration/batch_distribution_entropy": 0.9488017382610578,
"calibration/buffer_distribution_entropy": 0.9891109492581016,
"calibration/confidence_entropy": 0.4503818100967645,
"calibration/coverage@0%": 0.037109375,
"calibration/coverage@1%": 0.037109375,
"calibration/coverage@10%": 0.291015625,
"calibration/coverage@15%": 0.38515625,
"calibration/coverage@20%": 0.559375,
"calibration/coverage@25%": 0.65859375,
"calibration/coverage@30%": 0.803125,
"calibration/coverage@5%": 0.184765625,
"calibration/ece": 0.11336487553026367,
"calibration/mean_confidence": 0.5387612728269289,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 979.0,
"completions/max_terminated_length": 979.0,
"completions/mean_length": 246.88466796875,
"completions/mean_terminated_length": 246.9087158203125,
"completions/min_length": 96.6,
"completions/min_terminated_length": 121.4,
"epoch": 0.816,
"grad_norm": 0.005885324906557798,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 886653323.0,
"reward": 0.9899710774421692,
"reward_std": 0.08030715435743332,
"rewards/accuracy_reward": 0.5884765625,
"rewards/batch_coverage_0": 0.4053687870502472,
"rewards/batch_coverage_1": 0.4053687870502472,
"rewards/batch_coverage_10": 0.4392300844192505,
"rewards/batch_coverage_15": 0.44483659863471986,
"rewards/batch_coverage_20": 0.44895498752593993,
"rewards/batch_coverage_25": 0.4509244620800018,
"rewards/batch_coverage_5": 0.424567312002182,
"rewards/brier_reward": 0.8162455439567566,
"rewards/confidence_uniqueness_reward": 0.9505211353302002,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_entropy_batch_reward": -0.24021487236022948,
"signal/accuracy_reward/centered_abs_mean": 0.0829345703125,
"signal/accuracy_reward/group_std_mean": 0.11002247482538223,
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8337624669075012,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04146728515625,
"signal/advantage_abs_mean": 0.7705082893371582,
"signal/advantage_pre_scale_abs_mean": 0.06182959377765655,
"signal/advantage_pre_scale_std": 0.10288000404834748,
"signal/advantage_std": 0.9825070142745972,
"signal/batch_coverage_0/centered_abs_mean": 0.1394294634461403,
"signal/batch_coverage_0/group_std_mean": 0.17551667094230652,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04024726450443268,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001993841305375099,
"signal/batch_coverage_1/centered_abs_mean": 0.1394294634461403,
"signal/batch_coverage_1/group_std_mean": 0.17551667094230652,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04024726450443268,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001993841305375099,
"signal/batch_coverage_10/centered_abs_mean": 0.15050265192985535,
"signal/batch_coverage_10/group_std_mean": 0.19143196940422058,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043409749120473864,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021521878661587834,
"signal/batch_coverage_15/centered_abs_mean": 0.15238051116466522,
"signal/batch_coverage_15/group_std_mean": 0.19435729682445527,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04396386295557022,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021790412720292807,
"signal/batch_coverage_20/centered_abs_mean": 0.15019435286521912,
"signal/batch_coverage_20/group_std_mean": 0.19183135628700257,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04335668459534645,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002147779194638133,
"signal/batch_coverage_25/centered_abs_mean": 0.14935584962368012,
"signal/batch_coverage_25/group_std_mean": 0.1913298785686493,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.043079151213169097,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021357886493206025,
"signal/batch_coverage_5/centered_abs_mean": 0.1444351464509964,
"signal/batch_coverage_5/group_std_mean": 0.18239499628543854,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0416886031627655,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020654225954785943,
"signal/brier_reward/centered_abs_mean": 0.09700828045606613,
"signal/brier_reward/group_std_mean": 0.12455925345420837,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19543475210666655,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009700828790664673,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013165067881345749,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01705835647881031,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02658823914825916,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013165067881345748,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037154631689190866,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28098098635673524,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3505799949169159,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5675266325473786,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028098098933696747,
"step": 255
},
{
"calibration/aurc": 0.25830381510833755,
"calibration/batch_distribution_entropy": 0.9757110148254032,
"calibration/buffer_distribution_entropy": 0.9882824188981054,
"calibration/confidence_entropy": 0.46318351392250356,
"calibration/coverage@0%": 0.04375,
"calibration/coverage@1%": 0.04375,
"calibration/coverage@10%": 0.270703125,
"calibration/coverage@15%": 0.33828125,
"calibration/coverage@20%": 0.412890625,
"calibration/coverage@25%": 0.52109375,
"calibration/coverage@30%": 0.629296875,
"calibration/coverage@5%": 0.2,
"calibration/ece": 0.11027176139179815,
"calibration/mean_confidence": 0.49241658529860094,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 855.8,
"completions/max_terminated_length": 855.8,
"completions/mean_length": 251.2025390625,
"completions/mean_terminated_length": 251.2025390625,
"completions/min_length": 124.6,
"completions/min_terminated_length": 124.6,
"epoch": 0.832,
"grad_norm": 0.006431729532778263,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 904233989.0,
"reward": 0.9797815322875977,
"reward_std": 0.07985545843839645,
"rewards/accuracy_reward": 0.5634765625,
"rewards/batch_coverage_0": 0.43724015951156614,
"rewards/batch_coverage_1": 0.43724015951156614,
"rewards/batch_coverage_10": 0.4717229902744293,
"rewards/batch_coverage_15": 0.4749358117580414,
"rewards/batch_coverage_20": 0.48062952160835265,
"rewards/batch_coverage_25": 0.4813665568828583,
"rewards/batch_coverage_5": 0.45433294773101807,
"rewards/brier_reward": 0.8289217233657837,
"rewards/confidence_uniqueness_reward": 0.9487106323242187,
"rewards/format_reward": 1.0,
"rewards/frontier_entropy_batch_reward": -0.26015793681144717,
"signal/accuracy_reward/centered_abs_mean": 0.083837890625,
"signal/accuracy_reward/group_std_mean": 0.11118160039186478,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8772681474685669,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0419189453125,
"signal/advantage_abs_mean": 0.7730329275131226,
"signal/advantage_pre_scale_abs_mean": 0.06214970722794533,
"signal/advantage_pre_scale_std": 0.10454044938087463,
"signal/advantage_std": 0.9824274897575378,
"signal/batch_coverage_0/centered_abs_mean": 0.13734543919563294,
"signal/batch_coverage_0/group_std_mean": 0.1743619203567505,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.041223809123039246,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001964039751328528,
"signal/batch_coverage_1/centered_abs_mean": 0.13734543919563294,
"signal/batch_coverage_1/group_std_mean": 0.1743619203567505,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.041223809123039246,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001964039751328528,
"signal/batch_coverage_10/centered_abs_mean": 0.14575589895248414,
"signal/batch_coverage_10/group_std_mean": 0.18675495088100433,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043697334825992584,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020843093283474446,
"signal/batch_coverage_15/centered_abs_mean": 0.14540058076381684,
"signal/batch_coverage_15/group_std_mean": 0.18637823164463044,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04360126554965973,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020792283583432438,
"signal/batch_coverage_20/centered_abs_mean": 0.14694512784481048,
"signal/batch_coverage_20/group_std_mean": 0.18890998363494874,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04405587539076805,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002101315325126052,
"signal/batch_coverage_25/centered_abs_mean": 0.14675863683223725,
"signal/batch_coverage_25/group_std_mean": 0.18870731592178344,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04401228204369545,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002098648436367512,
"signal/batch_coverage_5/centered_abs_mean": 0.14158134162425995,
"signal/batch_coverage_5/group_std_mean": 0.18014432191848756,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04245214462280274,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020246132742613556,
"signal/brier_reward/centered_abs_mean": 0.0953995257616043,
"signal/brier_reward/group_std_mean": 0.12400536090135575,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19989734292030334,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009539952501654625,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014217448234558106,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017744265496730804,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029874777793884276,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001421744842082262,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2862050950527191,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3593140959739685,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6001887321472168,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028620509803295134,
"step": 260
},
{
"calibration/aurc": 0.27905522292217644,
"calibration/batch_distribution_entropy": 0.9632583305472959,
"calibration/buffer_distribution_entropy": 0.988097658160075,
"calibration/confidence_entropy": 0.45924203980536704,
"calibration/coverage@0%": 0.07421875,
"calibration/coverage@1%": 0.090234375,
"calibration/coverage@10%": 0.2296875,
"calibration/coverage@15%": 0.39765625,
"calibration/coverage@20%": 0.466796875,
"calibration/coverage@25%": 0.505859375,
"calibration/coverage@30%": 0.5421875,
"calibration/coverage@5%": 0.116015625,
"calibration/ece": 0.15995092522476045,
"calibration/mean_confidence": 0.5370619842603217,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 926.8,
"completions/max_terminated_length": 926.8,
"completions/mean_length": 250.95556640625,
"completions/mean_terminated_length": 251.10352478027343,
"completions/min_length": 25.0,
"completions/min_terminated_length": 125.6,
"epoch": 0.848,
"grad_norm": 0.006838792935013771,
"learning_rate": 1e-06,
"loss": -0.0009,
"num_tokens": 921818142.0,
"reward": 0.9706242799758911,
"reward_std": 0.07961578965187073,
"rewards/accuracy_reward": 0.54248046875,
"rewards/batch_coverage_0": 0.4266131818294525,
"rewards/batch_coverage_1": 0.4266131818294525,
"rewards/batch_coverage_10": 0.45701671838760377,
"rewards/batch_coverage_15": 0.4607600450515747,
"rewards/batch_coverage_20": 0.46680898666381837,
"rewards/batch_coverage_25": 0.4687318027019501,
"rewards/batch_coverage_5": 0.43967961668968203,
"rewards/brier_reward": 0.8209251642227173,
"rewards/confidence_uniqueness_reward": 0.9507812619209289,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_entropy_batch_reward": -0.2248463362455368,
"signal/accuracy_reward/centered_abs_mean": 0.075408935546875,
"signal/accuracy_reward/group_std_mean": 0.10307029783725738,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7802480101585388,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0377044677734375,
"signal/advantage_abs_mean": 0.763916802406311,
"signal/advantage_pre_scale_abs_mean": 0.059930368512868884,
"signal/advantage_pre_scale_std": 0.10379154682159424,
"signal/advantage_std": 0.9824334859848023,
"signal/batch_coverage_0/centered_abs_mean": 0.13436878621578216,
"signal/batch_coverage_0/group_std_mean": 0.17151402235031127,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04007416889071465,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019214736763387918,
"signal/batch_coverage_1/centered_abs_mean": 0.13436878621578216,
"signal/batch_coverage_1/group_std_mean": 0.17151402235031127,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04007416889071465,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019214736763387918,
"signal/batch_coverage_10/centered_abs_mean": 0.14380245208740233,
"signal/batch_coverage_10/group_std_mean": 0.18498321771621704,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04290038496255875,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002056375052779913,
"signal/batch_coverage_15/centered_abs_mean": 0.14388126730918885,
"signal/batch_coverage_15/group_std_mean": 0.18545418381690978,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04295293316245079,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020575019530951976,
"signal/batch_coverage_20/centered_abs_mean": 0.14691962897777558,
"signal/batch_coverage_20/group_std_mean": 0.18964103162288665,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04389960765838623,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.00210095071233809,
"signal/batch_coverage_25/centered_abs_mean": 0.14909811317920685,
"signal/batch_coverage_25/group_std_mean": 0.19255113303661348,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04451926797628403,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021321029867976903,
"signal/batch_coverage_5/centered_abs_mean": 0.1378627151250839,
"signal/batch_coverage_5/group_std_mean": 0.17606137692928314,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04110623449087143,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.001971436757594347,
"signal/brier_reward/centered_abs_mean": 0.098325015604496,
"signal/brier_reward/group_std_mean": 0.1293652281165123,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20545124709606172,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009832501597702503,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013685688003897667,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01880355179309845,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028652074560523034,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013685687445104122,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_std_mean": 0.0033145629335194827,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011606083437800407,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2727161109447479,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3431577146053314,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5705787897109985,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027271611243486406,
"step": 265
},
{
"calibration/aurc": 0.2567896225592432,
"calibration/batch_distribution_entropy": 0.9399621317744724,
"calibration/buffer_distribution_entropy": 0.9888205668127513,
"calibration/confidence_entropy": 0.46848315954934144,
"calibration/coverage@0%": 0.010953583059936304,
"calibration/coverage@1%": 0.010953583059936304,
"calibration/coverage@10%": 0.18382429384425003,
"calibration/coverage@15%": 0.24530100953052453,
"calibration/coverage@20%": 0.28050934286385787,
"calibration/coverage@25%": 0.4496928600207206,
"calibration/coverage@30%": 0.6899701271775834,
"calibration/coverage@5%": 0.0937660830599363,
"calibration/ece": 0.14122779695873705,
"calibration/mean_confidence": 0.6172851125428632,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 1024.2,
"completions/max_terminated_length": 1024.2,
"completions/mean_length": 246.3564453125,
"completions/mean_terminated_length": 246.52633056640624,
"completions/min_length": 21.0,
"completions/min_terminated_length": 122.8,
"epoch": 0.864,
"grad_norm": 0.007001855410635471,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 939327648.0,
"reward": 0.9944598317146301,
"reward_std": 0.08389261215925217,
"rewards/accuracy_reward": 0.6052734375,
"rewards/batch_coverage_0": 0.4061899304389954,
"rewards/batch_coverage_1": 0.4061899304389954,
"rewards/batch_coverage_10": 0.43846317529678347,
"rewards/batch_coverage_15": 0.444266802072525,
"rewards/batch_coverage_20": 0.4477324843406677,
"rewards/batch_coverage_25": 0.45078660249710084,
"rewards/batch_coverage_5": 0.4176724016666412,
"rewards/brier_reward": 0.812709105014801,
"rewards/confidence_uniqueness_reward": 0.9477102398872376,
"rewards/format_reward": 0.99921875,
"rewards/frontier_entropy_batch_reward": -0.2688983857631683,
"signal/accuracy_reward/centered_abs_mean": 0.0864501953125,
"signal/accuracy_reward/group_std_mean": 0.1119039848446846,
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8440386652946472,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04322509765625,
"signal/advantage_abs_mean": 0.7768685460090637,
"signal/advantage_pre_scale_abs_mean": 0.06493140161037445,
"signal/advantage_pre_scale_std": 0.10726050138473511,
"signal/advantage_std": 0.9825407266616821,
"signal/batch_coverage_0/centered_abs_mean": 0.1418234884738922,
"signal/batch_coverage_0/group_std_mean": 0.17914320528507233,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.040483998507261275,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020280758617445827,
"signal/batch_coverage_1/centered_abs_mean": 0.1418234884738922,
"signal/batch_coverage_1/group_std_mean": 0.17914320528507233,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.040483998507261275,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020280758617445827,
"signal/batch_coverage_10/centered_abs_mean": 0.15116735100746154,
"signal/batch_coverage_10/group_std_mean": 0.19266805946826934,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04304313659667969,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021616931073367594,
"signal/batch_coverage_15/centered_abs_mean": 0.1520349085330963,
"signal/batch_coverage_15/group_std_mean": 0.1941957652568817,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04327462837100029,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002174099301919341,
"signal/batch_coverage_20/centered_abs_mean": 0.15282979607582092,
"signal/batch_coverage_20/group_std_mean": 0.19580250084400178,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0434773713350296,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002185466093942523,
"signal/batch_coverage_25/centered_abs_mean": 0.15083151459693908,
"signal/batch_coverage_25/group_std_mean": 0.1939299464225769,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.042951537668704985,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002156890695914626,
"signal/batch_coverage_5/centered_abs_mean": 0.14520049095153809,
"signal/batch_coverage_5/group_std_mean": 0.18340204358100892,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04142942652106285,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020763671025633814,
"signal/brier_reward/centered_abs_mean": 0.10041524767875672,
"signal/brier_reward/group_std_mean": 0.13036752492189407,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19795797765254974,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010041524842381477,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015039702318608761,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020908067747950555,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029788796231150626,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015039702178910376,
"signal/format_reward/centered_abs_mean": 0.00150146484375,
"signal/format_reward/group_std_mean": 0.00408310885541141,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014261398650705814,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000750732421875,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2864925265312195,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35601104497909547,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5675512373447418,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028649253770709038,
"step": 270
},
{
"calibration/aurc": 0.3391713719809212,
"calibration/batch_distribution_entropy": 0.9652309050420375,
"calibration/buffer_distribution_entropy": 0.9887706535251917,
"calibration/confidence_entropy": 0.44366992640643527,
"calibration/coverage@0%": 0.011349571078431372,
"calibration/coverage@1%": 0.011349571078431372,
"calibration/coverage@10%": 0.048590686274509805,
"calibration/coverage@15%": 0.12131127450980392,
"calibration/coverage@20%": 0.20539981617647057,
"calibration/coverage@25%": 0.30826746323529414,
"calibration/coverage@30%": 0.40834405637254906,
"calibration/coverage@5%": 0.02350643382352941,
"calibration/ece": 0.12908799058345438,
"calibration/mean_confidence": 0.49746322064845183,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 789.8,
"completions/max_terminated_length": 789.8,
"completions/mean_length": 241.1212890625,
"completions/mean_terminated_length": 241.28960876464845,
"completions/min_length": 42.2,
"completions/min_terminated_length": 117.0,
"epoch": 0.88,
"grad_norm": 0.00824071653187275,
"learning_rate": 1e-06,
"loss": -0.0031,
"num_tokens": 956943802.0,
"reward": 0.9564361095428466,
"reward_std": 0.08327271789312363,
"rewards/accuracy_reward": 0.523046875,
"rewards/batch_coverage_0": 0.42483463883399963,
"rewards/batch_coverage_1": 0.42483463883399963,
"rewards/batch_coverage_10": 0.44779890179634096,
"rewards/batch_coverage_15": 0.4517851173877716,
"rewards/batch_coverage_20": 0.45584348440170286,
"rewards/batch_coverage_25": 0.46056185364723207,
"rewards/batch_coverage_5": 0.4392197012901306,
"rewards/brier_reward": 0.8138040423393249,
"rewards/confidence_uniqueness_reward": 0.9495703697204589,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_entropy_batch_reward": -0.25482745170593263,
"signal/accuracy_reward/centered_abs_mean": 0.08291015625,
"signal/accuracy_reward/group_std_mean": 0.1109766572713852,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8130660891532898,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.041455078125,
"signal/advantage_abs_mean": 0.7675025463104248,
"signal/advantage_pre_scale_abs_mean": 0.0639819398522377,
"signal/advantage_pre_scale_std": 0.1059723898768425,
"signal/advantage_std": 0.9825579881668091,
"signal/batch_coverage_0/centered_abs_mean": 0.13871148526668547,
"signal/batch_coverage_0/group_std_mean": 0.17681510448455812,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03895730599761009,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019835742190480233,
"signal/batch_coverage_1/centered_abs_mean": 0.13871148526668547,
"signal/batch_coverage_1/group_std_mean": 0.17681510448455812,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03895730599761009,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019835742190480233,
"signal/batch_coverage_10/centered_abs_mean": 0.14528686702251434,
"signal/batch_coverage_10/group_std_mean": 0.18570451736450194,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.040792569518089294,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020776021527126433,
"signal/batch_coverage_15/centered_abs_mean": 0.1425869882106781,
"signal/batch_coverage_15/group_std_mean": 0.18342476487159728,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040037110447883606,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002038993942551315,
"signal/batch_coverage_20/centered_abs_mean": 0.14162907898426055,
"signal/batch_coverage_20/group_std_mean": 0.18285691738128662,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03976234272122383,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020252959337085487,
"signal/batch_coverage_25/centered_abs_mean": 0.14475294947624207,
"signal/batch_coverage_25/group_std_mean": 0.1870112508535385,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04063992574810982,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020699672866612675,
"signal/batch_coverage_5/centered_abs_mean": 0.14309484958648683,
"signal/batch_coverage_5/group_std_mean": 0.1825674444437027,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04018484801054001,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002046256372705102,
"signal/brier_reward/centered_abs_mean": 0.10405312627553939,
"signal/brier_reward/group_std_mean": 0.13430474400520326,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20426113307476043,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010405313037335873,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014735890924930573,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020338327810168266,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028994759172201158,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014735890785232187,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.003866990189999342,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013323342800140381,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29404598474502563,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3665463447570801,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5783021211624145,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029404599219560623,
"step": 275
},
{
"calibration/aurc": 0.31771111015887915,
"calibration/batch_distribution_entropy": 0.974324873774067,
"calibration/buffer_distribution_entropy": 0.9886315735465725,
"calibration/confidence_entropy": 0.4536923645244853,
"calibration/coverage@0%": 0.04219361545988258,
"calibration/coverage@1%": 0.04609986545988258,
"calibration/coverage@10%": 0.15392153864970645,
"calibration/coverage@15%": 0.19650577910958905,
"calibration/coverage@20%": 0.34651877446183954,
"calibration/coverage@25%": 0.4226929427592955,
"calibration/coverage@30%": 0.4926293419765166,
"calibration/coverage@5%": 0.06133424045988258,
"calibration/ece": 0.18039484157785943,
"calibration/mean_confidence": 0.5008349061997756,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 936.6,
"completions/max_terminated_length": 936.6,
"completions/mean_length": 239.8802734375,
"completions/mean_terminated_length": 239.9035400390625,
"completions/min_length": 99.8,
"completions/min_terminated_length": 122.0,
"epoch": 0.896,
"grad_norm": 0.006939719431102276,
"learning_rate": 1e-06,
"loss": 0.0023,
"num_tokens": 974511024.0,
"reward": 0.9786574125289917,
"reward_std": 0.07587149292230606,
"rewards/accuracy_reward": 0.56845703125,
"rewards/batch_coverage_0": 0.41372602581977846,
"rewards/batch_coverage_1": 0.41372602581977846,
"rewards/batch_coverage_10": 0.449049574136734,
"rewards/batch_coverage_15": 0.45231945514678956,
"rewards/batch_coverage_20": 0.457261198759079,
"rewards/batch_coverage_25": 0.45867209434509276,
"rewards/batch_coverage_5": 0.4332360863685608,
"rewards/brier_reward": 0.8007945418357849,
"rewards/confidence_uniqueness_reward": 0.950228500366211,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_entropy_batch_reward": -0.24639837741851806,
"signal/accuracy_reward/centered_abs_mean": 0.072772216796875,
"signal/accuracy_reward/group_std_mean": 0.10187921673059464,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7487026810646057,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0363861083984375,
"signal/advantage_abs_mean": 0.7618164420127869,
"signal/advantage_pre_scale_abs_mean": 0.05696792304515839,
"signal/advantage_pre_scale_std": 0.09597984254360199,
"signal/advantage_std": 0.9824573516845703,
"signal/batch_coverage_0/centered_abs_mean": 0.13853881061077117,
"signal/batch_coverage_0/group_std_mean": 0.1770318329334259,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.040990565717220304,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001981105003505945,
"signal/batch_coverage_1/centered_abs_mean": 0.13853881061077117,
"signal/batch_coverage_1/group_std_mean": 0.1770318329334259,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.040990565717220304,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001981105003505945,
"signal/batch_coverage_10/centered_abs_mean": 0.14625149667263032,
"signal/batch_coverage_10/group_std_mean": 0.18902421295642852,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043303582072258,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002091396367177367,
"signal/batch_coverage_15/centered_abs_mean": 0.14543745517730713,
"signal/batch_coverage_15/group_std_mean": 0.1879607081413269,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04308133721351624,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020797555334866047,
"signal/batch_coverage_20/centered_abs_mean": 0.14614311456680298,
"signal/batch_coverage_20/group_std_mean": 0.1893752932548523,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04328845590353012,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020898465532809495,
"signal/batch_coverage_25/centered_abs_mean": 0.14725618660449982,
"signal/batch_coverage_25/group_std_mean": 0.19075983464717866,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.043608113378286364,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021057635080069304,
"signal/batch_coverage_5/centered_abs_mean": 0.14275825321674346,
"signal/batch_coverage_5/group_std_mean": 0.1829033762216568,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04225177988409996,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020414429251104592,
"signal/brier_reward/centered_abs_mean": 0.09953672885894775,
"signal/brier_reward/group_std_mean": 0.1298005163669586,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20504209995269776,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009953673183917999,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013294638693332672,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016960232332348824,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027427341789007187,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013294639065861702,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018184378743171692,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2813436031341553,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35305030941963195,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5798410654067994,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028134360536932947,
"step": 280
},
{
"calibration/aurc": 0.30828852543253227,
"calibration/batch_distribution_entropy": 0.9660153375103102,
"calibration/buffer_distribution_entropy": 0.988424190292647,
"calibration/confidence_entropy": 0.45257516243344964,
"calibration/coverage@0%": 0.034765625,
"calibration/coverage@1%": 0.034765625,
"calibration/coverage@10%": 0.158203125,
"calibration/coverage@15%": 0.280859375,
"calibration/coverage@20%": 0.374609375,
"calibration/coverage@25%": 0.45390625,
"calibration/coverage@30%": 0.54609375,
"calibration/coverage@5%": 0.116796875,
"calibration/ece": 0.1127079457086764,
"calibration/mean_confidence": 0.5327337166604502,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 794.4,
"completions/max_terminated_length": 794.4,
"completions/mean_length": 238.48564453125,
"completions/mean_terminated_length": 238.6263916015625,
"completions/min_length": 24.0,
"completions/min_terminated_length": 120.6,
"epoch": 0.912,
"grad_norm": 0.0068150414153933525,
"learning_rate": 1e-06,
"loss": -0.0043,
"num_tokens": 992004413.0,
"reward": 0.9777844548225403,
"reward_std": 0.07660511136054993,
"rewards/accuracy_reward": 0.558203125,
"rewards/batch_coverage_0": 0.4496914088726044,
"rewards/batch_coverage_1": 0.4496914088726044,
"rewards/batch_coverage_10": 0.4754992663860321,
"rewards/batch_coverage_15": 0.4782718360424042,
"rewards/batch_coverage_20": 0.48222748637199403,
"rewards/batch_coverage_25": 0.4871933341026306,
"rewards/batch_coverage_5": 0.4609818339347839,
"rewards/brier_reward": 0.8314094185829163,
"rewards/confidence_uniqueness_reward": 0.947171938419342,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_entropy_batch_reward": -0.25837140083312987,
"signal/accuracy_reward/centered_abs_mean": 0.07244873046875,
"signal/accuracy_reward/group_std_mean": 0.09671394675970077,
"signal/accuracy_reward/group_zero_std_frac": 0.71875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7836610794067382,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.036224365234375,
"signal/advantage_abs_mean": 0.7689519405364991,
"signal/advantage_pre_scale_abs_mean": 0.05903713330626488,
"signal/advantage_pre_scale_std": 0.10071672052145005,
"signal/advantage_std": 0.9823365330696106,
"signal/batch_coverage_0/centered_abs_mean": 0.14369446337223052,
"signal/batch_coverage_0/group_std_mean": 0.18067781627178192,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04487623497843742,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002054830873385072,
"signal/batch_coverage_1/centered_abs_mean": 0.14369446337223052,
"signal/batch_coverage_1/group_std_mean": 0.18067781627178192,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04487623497843742,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002054830873385072,
"signal/batch_coverage_10/centered_abs_mean": 0.14944251179695128,
"signal/batch_coverage_10/group_std_mean": 0.18889077603816987,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04679513275623322,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021370279137045145,
"signal/batch_coverage_15/centered_abs_mean": 0.15104621648788452,
"signal/batch_coverage_15/group_std_mean": 0.19096899330615996,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0472504124045372,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002159960940480232,
"signal/batch_coverage_20/centered_abs_mean": 0.15283162593841554,
"signal/batch_coverage_20/group_std_mean": 0.1936648577451706,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.047779142111539843,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021854921244084833,
"signal/batch_coverage_25/centered_abs_mean": 0.15555560290813447,
"signal/batch_coverage_25/group_std_mean": 0.19730362594127654,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04867595061659813,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002224445063620806,
"signal/batch_coverage_5/centered_abs_mean": 0.14733441770076752,
"signal/batch_coverage_5/group_std_mean": 0.18538997173309327,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.046044493466615675,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002106882119551301,
"signal/brier_reward/centered_abs_mean": 0.0970422387123108,
"signal/brier_reward/group_std_mean": 0.12534932047128677,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.211318901181221,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.00970422402024269,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01565753761678934,
"signal/confidence_uniqueness_reward/group_std_mean": 0.021441229432821274,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03439625911414623,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015657537849619984,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_std_mean": 0.0033145629335194827,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011679522693157196,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2794118285179138,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3517456650733948,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6118232488632203,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027941183745861055,
"step": 285
},
{
"calibration/aurc": 0.3555222457423709,
"calibration/batch_distribution_entropy": 0.9740519445999197,
"calibration/buffer_distribution_entropy": 0.9891156696064547,
"calibration/confidence_entropy": 0.48150519224400945,
"calibration/coverage@0%": 0.0234375,
"calibration/coverage@1%": 0.0234375,
"calibration/coverage@10%": 0.089453125,
"calibration/coverage@15%": 0.145703125,
"calibration/coverage@20%": 0.18515625,
"calibration/coverage@25%": 0.230078125,
"calibration/coverage@30%": 0.3078125,
"calibration/coverage@5%": 0.041015625,
"calibration/ece": 0.10356664177993731,
"calibration/mean_confidence": 0.4613492720779721,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 831.0,
"completions/max_terminated_length": 831.0,
"completions/mean_length": 236.5126953125,
"completions/mean_terminated_length": 236.53578491210936,
"completions/min_length": 98.2,
"completions/min_terminated_length": 121.4,
"epoch": 0.928,
"grad_norm": 0.006446013692766428,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 1009453119.0,
"reward": 0.9647042751312256,
"reward_std": 0.07521760016679764,
"rewards/accuracy_reward": 0.5376953125,
"rewards/batch_coverage_0": 0.4267537951469421,
"rewards/batch_coverage_1": 0.4267537951469421,
"rewards/batch_coverage_10": 0.45175575017929076,
"rewards/batch_coverage_15": 0.4577596127986908,
"rewards/batch_coverage_20": 0.46226202249526976,
"rewards/batch_coverage_25": 0.4647360146045685,
"rewards/batch_coverage_5": 0.4439967393875122,
"rewards/brier_reward": 0.8195161938667297,
"rewards/confidence_uniqueness_reward": 0.9499494791030884,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_entropy_batch_reward": -0.25857561230659487,
"signal/accuracy_reward/centered_abs_mean": 0.07042236328125,
"signal/accuracy_reward/group_std_mean": 0.09534862488508225,
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6946340084075928,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035211181640625,
"signal/advantage_abs_mean": 0.7717218399047852,
"signal/advantage_pre_scale_abs_mean": 0.058029332756996156,
"signal/advantage_pre_scale_std": 0.09526041895151138,
"signal/advantage_std": 0.9825134754180909,
"signal/batch_coverage_0/centered_abs_mean": 0.14040791988372803,
"signal/batch_coverage_0/group_std_mean": 0.17896082103252411,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.040377072244882586,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002007833169773221,
"signal/batch_coverage_1/centered_abs_mean": 0.14040791988372803,
"signal/batch_coverage_1/group_std_mean": 0.17896082103252411,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.040377072244882586,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002007833169773221,
"signal/batch_coverage_10/centered_abs_mean": 0.14643791019916536,
"signal/batch_coverage_10/group_std_mean": 0.18706798255443574,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04210822582244873,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.002094061998650432,
"signal/batch_coverage_15/centered_abs_mean": 0.14486917555332185,
"signal/batch_coverage_15/group_std_mean": 0.1849692642688751,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.041620150208473206,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002071629255078733,
"signal/batch_coverage_20/centered_abs_mean": 0.14694713354110717,
"signal/batch_coverage_20/group_std_mean": 0.18772556483745576,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04221231043338776,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.002101344126276672,
"signal/batch_coverage_25/centered_abs_mean": 0.14691944420337677,
"signal/batch_coverage_25/group_std_mean": 0.18790694773197175,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04220572412014008,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002100948058068752,
"signal/batch_coverage_5/centered_abs_mean": 0.1435709625482559,
"signal/batch_coverage_5/group_std_mean": 0.18303219974040985,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04129085242748261,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002053064666688442,
"signal/brier_reward/centered_abs_mean": 0.0938475176692009,
"signal/brier_reward/group_std_mean": 0.1227958619594574,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18740336000919341,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009384752437472343,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01345120333135128,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017086662724614142,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027048880234360695,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013451203238219022,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0017758136615157127,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29375959038734434,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36533265709877016,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5884610295295716,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029375958442687988,
"step": 290
},
{
"calibration/aurc": 0.2229863112287252,
"calibration/batch_distribution_entropy": 0.9637147094213729,
"calibration/buffer_distribution_entropy": 0.9898969165099345,
"calibration/confidence_entropy": 0.45262899153877767,
"calibration/coverage@0%": 0.0421875,
"calibration/coverage@1%": 0.0421875,
"calibration/coverage@10%": 0.32890625,
"calibration/coverage@15%": 0.410546875,
"calibration/coverage@20%": 0.523828125,
"calibration/coverage@25%": 0.603125,
"calibration/coverage@30%": 0.683984375,
"calibration/coverage@5%": 0.187109375,
"calibration/ece": 0.09124665911387694,
"calibration/mean_confidence": 0.48950484950374407,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 635.6,
"completions/max_terminated_length": 635.6,
"completions/mean_length": 236.31083984375,
"completions/mean_terminated_length": 236.3336669921875,
"completions/min_length": 96.2,
"completions/min_terminated_length": 116.6,
"epoch": 0.944,
"grad_norm": 0.0067446487955749035,
"learning_rate": 1e-06,
"loss": 0.0053,
"num_tokens": 1026848366.0,
"reward": 0.9670644760131836,
"reward_std": 0.08264310508966446,
"rewards/accuracy_reward": 0.54658203125,
"rewards/batch_coverage_0": 0.41733229756355283,
"rewards/batch_coverage_1": 0.41733229756355283,
"rewards/batch_coverage_10": 0.45554951429367063,
"rewards/batch_coverage_15": 0.457538378238678,
"rewards/batch_coverage_20": 0.46042126417160034,
"rewards/batch_coverage_25": 0.46144301891326905,
"rewards/batch_coverage_5": 0.4349259316921234,
"rewards/brier_reward": 0.8223084926605224,
"rewards/confidence_uniqueness_reward": 0.9484071016311646,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_entropy_batch_reward": -0.2764424979686737,
"signal/accuracy_reward/centered_abs_mean": 0.087493896484375,
"signal/accuracy_reward/group_std_mean": 0.1149898737668991,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8754186749458313,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0437469482421875,
"signal/advantage_abs_mean": 0.7771499872207641,
"signal/advantage_pre_scale_abs_mean": 0.06411685273051262,
"signal/advantage_pre_scale_std": 0.10613652616739273,
"signal/advantage_std": 0.9825153589248657,
"signal/batch_coverage_0/centered_abs_mean": 0.13396848738193512,
"signal/batch_coverage_0/group_std_mean": 0.16862494945526124,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.038404418528079985,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019157493952661753,
"signal/batch_coverage_1/centered_abs_mean": 0.13396848738193512,
"signal/batch_coverage_1/group_std_mean": 0.16862494945526124,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.038404418528079985,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019157493952661753,
"signal/batch_coverage_10/centered_abs_mean": 0.143884015083313,
"signal/batch_coverage_10/group_std_mean": 0.18276259899139405,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04123903587460518,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020575413946062326,
"signal/batch_coverage_15/centered_abs_mean": 0.1433136433362961,
"signal/batch_coverage_15/group_std_mean": 0.18256149888038636,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.041087044030427934,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0020493850577622654,
"signal/batch_coverage_20/centered_abs_mean": 0.14255596399307252,
"signal/batch_coverage_20/group_std_mean": 0.18211045265197753,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04089748486876488,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020385502837598326,
"signal/batch_coverage_25/centered_abs_mean": 0.14203818142414093,
"signal/batch_coverage_25/group_std_mean": 0.1817559063434601,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04073741212487221,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002031145920045674,
"signal/batch_coverage_5/centered_abs_mean": 0.1392223507165909,
"signal/batch_coverage_5/group_std_mean": 0.175718292593956,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03989290744066239,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019908795831725,
"signal/brier_reward/centered_abs_mean": 0.09500806033611298,
"signal/brier_reward/group_std_mean": 0.1222013533115387,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19000862538814545,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009500806406140327,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014246679656207562,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018000596016645432,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028644410893321037,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014246679609641432,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019225865602493287,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2949823498725891,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36604434847831724,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5930037498474121,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029498234018683435,
"step": 295
},
{
"calibration/aurc": 0.3204940128617194,
"calibration/batch_distribution_entropy": 0.9580938856768244,
"calibration/buffer_distribution_entropy": 0.9896996163940806,
"calibration/confidence_entropy": 0.4399139243375204,
"calibration/coverage@0%": 0.005078125,
"calibration/coverage@1%": 0.005078125,
"calibration/coverage@10%": 0.10859375,
"calibration/coverage@15%": 0.242578125,
"calibration/coverage@20%": 0.275390625,
"calibration/coverage@25%": 0.356640625,
"calibration/coverage@30%": 0.4125,
"calibration/coverage@5%": 0.005078125,
"calibration/ece": 0.1301704799550848,
"calibration/mean_confidence": 0.5383094842630045,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 615.8,
"completions/max_terminated_length": 615.8,
"completions/mean_length": 232.53017578125,
"completions/mean_terminated_length": 232.53017578125,
"completions/min_length": 123.4,
"completions/min_terminated_length": 123.4,
"epoch": 0.96,
"grad_norm": 0.0065346090123057365,
"learning_rate": 1e-06,
"loss": -0.002,
"num_tokens": 1044169795.0,
"reward": 0.9667318820953369,
"reward_std": 0.07006315439939499,
"rewards/accuracy_reward": 0.53857421875,
"rewards/batch_coverage_0": 0.451965457201004,
"rewards/batch_coverage_1": 0.451965457201004,
"rewards/batch_coverage_10": 0.487179833650589,
"rewards/batch_coverage_15": 0.4893311381340027,
"rewards/batch_coverage_20": 0.49355667233467104,
"rewards/batch_coverage_25": 0.4953798890113831,
"rewards/batch_coverage_5": 0.4693809628486633,
"rewards/brier_reward": 0.8260141253471375,
"rewards/confidence_uniqueness_reward": 0.9484649658203125,
"rewards/format_reward": 1.0,
"rewards/frontier_entropy_batch_reward": -0.27747427225112914,
"signal/accuracy_reward/centered_abs_mean": 0.061724853515625,
"signal/accuracy_reward/group_std_mean": 0.08646569401025772,
"signal/accuracy_reward/group_zero_std_frac": 0.734375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6621787786483765,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0308624267578125,
"signal/advantage_abs_mean": 0.7726797819137573,
"signal/advantage_pre_scale_abs_mean": 0.05308959484100342,
"signal/advantage_pre_scale_std": 0.08946077674627304,
"signal/advantage_std": 0.9823702216148377,
"signal/batch_coverage_0/centered_abs_mean": 0.1396566614508629,
"signal/batch_coverage_0/group_std_mean": 0.17796612679958343,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.043179111927747725,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019970902940258384,
"signal/batch_coverage_1/centered_abs_mean": 0.1396566614508629,
"signal/batch_coverage_1/group_std_mean": 0.17796612679958343,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.043179111927747725,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019970902940258384,
"signal/batch_coverage_10/centered_abs_mean": 0.14766047298908233,
"signal/batch_coverage_10/group_std_mean": 0.18960849642753602,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04558332860469818,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021115447394549846,
"signal/batch_coverage_15/centered_abs_mean": 0.14853170812129973,
"signal/batch_coverage_15/group_std_mean": 0.1907793253660202,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04583237245678902,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002124003367498517,
"signal/batch_coverage_20/centered_abs_mean": 0.15066201984882355,
"signal/batch_coverage_20/group_std_mean": 0.1938926547765732,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04643941894173622,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021544668823480608,
"signal/batch_coverage_25/centered_abs_mean": 0.14919013679027557,
"signal/batch_coverage_25/group_std_mean": 0.1926431655883789,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04591233804821968,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002133418945595622,
"signal/batch_coverage_5/centered_abs_mean": 0.1437687397003174,
"signal/batch_coverage_5/group_std_mean": 0.18378321528434755,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04439370557665825,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020558929536491632,
"signal/brier_reward/centered_abs_mean": 0.09213972389698029,
"signal/brier_reward/group_std_mean": 0.11985861659049987,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1980680286884308,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009213972836732864,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014541387557983398,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018049951083958148,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03146158419549465,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014541388023644685,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30066679120063783,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37262142896652223,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6474065899848938,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030066679790616037,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.4146560871220133,
"eval_calibration/batch_distribution_entropy": 0.9208862479532871,
"eval_calibration/buffer_distribution_entropy": 0.9888979645233786,
"eval_calibration/confidence_entropy": 0.4375698210434617,
"eval_calibration/coverage@0%": 0.078125,
"eval_calibration/coverage@1%": 0.078125,
"eval_calibration/coverage@10%": 0.078125,
"eval_calibration/coverage@15%": 0.0859375,
"eval_calibration/coverage@20%": 0.265625,
"eval_calibration/coverage@25%": 0.34375,
"eval_calibration/coverage@30%": 0.390625,
"eval_calibration/coverage@5%": 0.078125,
"eval_calibration/ece": 0.18276220207815103,
"eval_calibration/mean_confidence": 0.4358244321820579,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 424.0,
"eval_completions/max_terminated_length": 424.0,
"eval_completions/mean_length": 233.96672821044922,
"eval_completions/mean_terminated_length": 233.96672821044922,
"eval_completions/min_length": 136.5,
"eval_completions/min_terminated_length": 136.5,
"eval_loss": 0.0,
"eval_num_tokens": 1044169795.0,
"eval_reward": 0.8108105212450027,
"eval_reward_std": 0.2273394763469696,
"eval_rewards/accuracy_reward": 0.44921875,
"eval_rewards/batch_coverage_0": 0.17630108073353767,
"eval_rewards/batch_coverage_1": 0.17630108073353767,
"eval_rewards/batch_coverage_10": 0.17482585459947586,
"eval_rewards/batch_coverage_15": 0.16643786057829857,
"eval_rewards/batch_coverage_20": 0.12921502068638802,
"eval_rewards/batch_coverage_25": 0.10374573059380054,
"eval_rewards/batch_coverage_5": 0.17630108073353767,
"eval_rewards/brier_reward": 0.8116861432790756,
"eval_rewards/confidence_uniqueness_reward": 0.892578125,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 21.4639,
"eval_samples_per_second": 23.295,
"eval_signal/accuracy_reward/centered_abs_mean": 0.47900390625,
"eval_signal/accuracy_reward/group_std_mean": 0.4969187304377556,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0559919476509094,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.239501953125,
"eval_signal/advantage_abs_mean": 0.9430637061595917,
"eval_signal/advantage_pre_scale_abs_mean": 0.21488191559910774,
"eval_signal/advantage_pre_scale_std": 0.2248552180826664,
"eval_signal/advantage_std": 0.9876824915409088,
"eval_signal/batch_coverage_0/centered_abs_mean": 0.3311324641108513,
"eval_signal/batch_coverage_0/group_std_mean": 0.3937782421708107,
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.021000605076551437,
"eval_signal/batch_coverage_0/weight": 0.014299999922513962,
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.004735194204840809,
"eval_signal/batch_coverage_1/centered_abs_mean": 0.3311324641108513,
"eval_signal/batch_coverage_1/group_std_mean": 0.3937782421708107,
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.021000605076551437,
"eval_signal/batch_coverage_1/weight": 0.014299999922513962,
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.004735194204840809,
"eval_signal/batch_coverage_10/centered_abs_mean": 0.325124341994524,
"eval_signal/batch_coverage_10/group_std_mean": 0.3854726776480675,
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.020634129643440247,
"eval_signal/batch_coverage_10/weight": 0.014299999922513962,
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.004649278009310365,
"eval_signal/batch_coverage_15/centered_abs_mean": 0.28606269136071205,
"eval_signal/batch_coverage_15/group_std_mean": 0.3394397348165512,
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01813029870390892,
"eval_signal/batch_coverage_15/weight": 0.014299999922513962,
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.004090696515049785,
"eval_signal/batch_coverage_20/centered_abs_mean": 0.19810736551880836,
"eval_signal/batch_coverage_20/group_std_mean": 0.2412240207195282,
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012629861943423748,
"eval_signal/batch_coverage_20/weight": 0.014299999922513962,
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0028329353663139045,
"eval_signal/batch_coverage_25/centered_abs_mean": 0.16219930350780487,
"eval_signal/batch_coverage_25/group_std_mean": 0.2030845247209072,
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.010249435435980558,
"eval_signal/batch_coverage_25/weight": 0.014299999922513962,
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0023194500245153904,
"eval_signal/batch_coverage_5/centered_abs_mean": 0.3311324641108513,
"eval_signal/batch_coverage_5/group_std_mean": 0.3937782421708107,
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.021000605076551437,
"eval_signal/batch_coverage_5/weight": 0.014299999922513962,
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.004735194204840809,
"eval_signal/brier_reward/centered_abs_mean": 0.18613890558481216,
"eval_signal/brier_reward/group_std_mean": 0.24047620594501495,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0822746567428112,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01861389074474573,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04364013671875,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0524612283334136,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01921792607754469,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004364013671875,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.186,
"step": 300
},
{
"calibration/aurc": 0.23395873459072755,
"calibration/batch_distribution_entropy": 0.969196375655712,
"calibration/buffer_distribution_entropy": 0.9887961994688961,
"calibration/confidence_entropy": 0.4758519319323664,
"calibration/coverage@0%": 0.05312958659491194,
"calibration/coverage@1%": 0.05312958659491194,
"calibration/coverage@10%": 0.3391496453033268,
"calibration/coverage@15%": 0.4352747370352251,
"calibration/coverage@20%": 0.51537655944227,
"calibration/coverage@25%": 0.5962818003913894,
"calibration/coverage@30%": 0.6365429305283757,
"calibration/coverage@5%": 0.19614878913894324,
"calibration/ece": 0.14624253957562217,
"calibration/mean_confidence": 0.4805521077708187,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 628.2,
"completions/max_terminated_length": 628.2,
"completions/mean_length": 235.7103515625,
"completions/mean_terminated_length": 235.73360595703124,
"completions/min_length": 97.2,
"completions/min_terminated_length": 123.4,
"epoch": 0.976,
"grad_norm": 0.008149566128849983,
"learning_rate": 1e-06,
"loss": -0.0004,
"num_tokens": 1061444589.0,
"reward": 0.9804271697998047,
"reward_std": 0.07212701141834259,
"rewards/accuracy_reward": 0.57001953125,
"rewards/batch_coverage_0": 0.4203511416912079,
"rewards/batch_coverage_1": 0.4203511416912079,
"rewards/batch_coverage_10": 0.4480322599411011,
"rewards/batch_coverage_15": 0.4536835730075836,
"rewards/batch_coverage_20": 0.4561052739620209,
"rewards/batch_coverage_25": 0.45771525502204896,
"rewards/batch_coverage_5": 0.43514758348464966,
"rewards/brier_reward": 0.8058765411376954,
"rewards/confidence_uniqueness_reward": 0.9504483699798584,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_entropy_batch_reward": -0.24373058676719667,
"signal/accuracy_reward/centered_abs_mean": 0.066033935546875,
"signal/accuracy_reward/group_std_mean": 0.09485945627093315,
"signal/accuracy_reward/group_zero_std_frac": 0.7,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6698218882083893,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0330169677734375,
"signal/advantage_abs_mean": 0.762388014793396,
"signal/advantage_pre_scale_abs_mean": 0.05398530513048172,
"signal/advantage_pre_scale_std": 0.08997991234064102,
"signal/advantage_std": 0.9824227690696716,
"signal/batch_coverage_0/centered_abs_mean": 0.1356405645608902,
"signal/batch_coverage_0/group_std_mean": 0.17252211272716522,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0407505564391613,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019396600546315312,
"signal/batch_coverage_1/centered_abs_mean": 0.1356405645608902,
"signal/batch_coverage_1/group_std_mean": 0.17252211272716522,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0407505564391613,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019396600546315312,
"signal/batch_coverage_10/centered_abs_mean": 0.1435418903827667,
"signal/batch_coverage_10/group_std_mean": 0.18347274363040925,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.043130411952733996,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0020526490174233914,
"signal/batch_coverage_15/centered_abs_mean": 0.14430699050426482,
"signal/batch_coverage_15/group_std_mean": 0.18479589521884918,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.043363725394010545,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.002063590008765459,
"signal/batch_coverage_20/centered_abs_mean": 0.14351218342781066,
"signal/batch_coverage_20/group_std_mean": 0.18394066393375397,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04315835386514664,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0020522242411971092,
"signal/batch_coverage_25/centered_abs_mean": 0.144418603181839,
"signal/batch_coverage_25/group_std_mean": 0.18517533540725709,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04345187172293663,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.002065186039544642,
"signal/batch_coverage_5/centered_abs_mean": 0.13880332112312316,
"signal/batch_coverage_5/group_std_mean": 0.17648251950740815,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.041730723530054095,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.001984887500293553,
"signal/brier_reward/centered_abs_mean": 0.09185196608304977,
"signal/brier_reward/group_std_mean": 0.11965415328741073,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19028878211975098,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009185196924954652,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012751653417944908,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01619891356676817,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026918485760688782,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012751653557643294,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002051408402621746,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27679008841514585,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34837120175361636,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5824168384075165,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02767900936305523,
"step": 305
},
{
"calibration/aurc": 0.35057905085704577,
"calibration/batch_distribution_entropy": 0.9700197131768388,
"calibration/buffer_distribution_entropy": 0.9887671953972358,
"calibration/confidence_entropy": 0.464604167881242,
"calibration/coverage@0%": 0.00859375,
"calibration/coverage@1%": 0.00859375,
"calibration/coverage@10%": 0.016796875,
"calibration/coverage@15%": 0.072265625,
"calibration/coverage@20%": 0.273046875,
"calibration/coverage@25%": 0.3640625,
"calibration/coverage@30%": 0.45546875,
"calibration/coverage@5%": 0.00859375,
"calibration/ece": 0.14386630288195734,
"calibration/mean_confidence": 0.5126978774074185,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 609.0,
"completions/max_terminated_length": 609.0,
"completions/mean_length": 233.7537109375,
"completions/mean_terminated_length": 233.7537109375,
"completions/min_length": 123.8,
"completions/min_terminated_length": 123.8,
"epoch": 0.992,
"grad_norm": 0.0069343592040240765,
"learning_rate": 1e-06,
"loss": -0.0023,
"num_tokens": 1078966707.0,
"reward": 0.9605294466018677,
"reward_std": 0.07477787286043167,
"rewards/accuracy_reward": 0.530078125,
"rewards/batch_coverage_0": 0.43314738273620607,
"rewards/batch_coverage_1": 0.43314738273620607,
"rewards/batch_coverage_10": 0.45744775533676146,
"rewards/batch_coverage_15": 0.4635770797729492,
"rewards/batch_coverage_20": 0.46865540742874146,
"rewards/batch_coverage_25": 0.47068612575531005,
"rewards/batch_coverage_5": 0.44648249745368956,
"rewards/brier_reward": 0.8139971017837524,
"rewards/confidence_uniqueness_reward": 0.9495025634765625,
"rewards/format_reward": 1.0,
"rewards/frontier_entropy_batch_reward": -0.2623551905155182,
"signal/accuracy_reward/centered_abs_mean": 0.069140625,
"signal/accuracy_reward/group_std_mean": 0.09174189940094948,
"signal/accuracy_reward/group_zero_std_frac": 0.734375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7008903384208679,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0345703125,
"signal/advantage_abs_mean": 0.7781669855117798,
"signal/advantage_pre_scale_abs_mean": 0.05807532519102097,
"signal/advantage_pre_scale_std": 0.09566855132579803,
"signal/advantage_std": 0.9824541807174683,
"signal/batch_coverage_0/centered_abs_mean": 0.14170411229133606,
"signal/batch_coverage_0/group_std_mean": 0.17635067999362947,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.041892097890377046,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020263687707483768,
"signal/batch_coverage_1/centered_abs_mean": 0.14170411229133606,
"signal/batch_coverage_1/group_std_mean": 0.17635067999362947,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.041892097890377046,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020263687707483768,
"signal/batch_coverage_10/centered_abs_mean": 0.14685550332069397,
"signal/batch_coverage_10/group_std_mean": 0.18372409641742707,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04345771968364716,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021000336622819305,
"signal/batch_coverage_15/centered_abs_mean": 0.14845097064971924,
"signal/batch_coverage_15/group_std_mean": 0.18660386502742768,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.043903425335884094,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021228487603366375,
"signal/batch_coverage_20/centered_abs_mean": 0.14924948811531066,
"signal/batch_coverage_20/group_std_mean": 0.18820964694023132,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04425108656287193,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.0021342677529901264,
"signal/batch_coverage_25/centered_abs_mean": 0.14973369836807252,
"signal/batch_coverage_25/group_std_mean": 0.1890793949365616,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04441794827580452,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0021411918569356203,
"signal/batch_coverage_5/centered_abs_mean": 0.1451415926218033,
"signal/batch_coverage_5/group_std_mean": 0.18125473260879515,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04290560409426689,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002075524744577706,
"signal/brier_reward/centered_abs_mean": 0.09672370105981827,
"signal/brier_reward/group_std_mean": 0.1231953427195549,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1986709266901016,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.00967237027361989,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013905191421508789,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01732446514070034,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02885650247335434,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001390519179403782,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2993383765220642,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37033950686454775,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6192003607749939,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029933837801218034,
"step": 310
},
{
"calibration/aurc": 0.24144867169583295,
"calibration/batch_distribution_entropy": 0.9655930825339882,
"calibration/buffer_distribution_entropy": 0.9890472365063823,
"calibration/confidence_entropy": 0.4795974319609599,
"calibration/coverage@0%": 0.0029296875,
"calibration/coverage@1%": 0.0029296875,
"calibration/coverage@10%": 0.08984375,
"calibration/coverage@15%": 0.15234375,
"calibration/coverage@20%": 0.3466796875,
"calibration/coverage@25%": 0.658203125,
"calibration/coverage@30%": 0.7861328125,
"calibration/coverage@5%": 0.0595703125,
"calibration/ece": 0.14323620346773447,
"calibration/mean_confidence": 0.6021585478088081,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 556.5,
"completions/max_terminated_length": 556.5,
"completions/mean_length": 241.6327667236328,
"completions/mean_terminated_length": 241.6327667236328,
"completions/min_length": 111.5,
"completions/min_terminated_length": 111.5,
"epoch": 0.9984,
"num_tokens": 1085956914.0,
"reward": 0.9854284524917603,
"reward_std": 0.08515419811010361,
"rewards/accuracy_reward": 0.586181640625,
"rewards/batch_coverage_0": 0.37667161226272583,
"rewards/batch_coverage_1": 0.37667161226272583,
"rewards/batch_coverage_10": 0.40571996569633484,
"rewards/batch_coverage_15": 0.4070962965488434,
"rewards/batch_coverage_20": 0.4126690626144409,
"rewards/batch_coverage_25": 0.41465383768081665,
"rewards/batch_coverage_5": 0.38757333159446716,
"rewards/brier_reward": 0.7919209599494934,
"rewards/confidence_uniqueness_reward": 0.9523754119873047,
"rewards/format_reward": 1.0,
"rewards/frontier_entropy_batch_reward": -0.218610942363739,
"signal/accuracy_reward/centered_abs_mean": 0.0902557373046875,
"signal/accuracy_reward/group_std_mean": 0.11484164744615555,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8470200598239899,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04512786865234375,
"signal/advantage_abs_mean": 0.781099408864975,
"signal/advantage_pre_scale_abs_mean": 0.06732682883739471,
"signal/advantage_pre_scale_std": 0.10796621814370155,
"signal/advantage_std": 0.9826479554176331,
"signal/batch_coverage_0/centered_abs_mean": 0.14751210063695908,
"signal/batch_coverage_0/group_std_mean": 0.18181322515010834,
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03951258212327957,
"signal/batch_coverage_0/weight": 0.014299999922513962,
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021094229305163026,
"signal/batch_coverage_1/centered_abs_mean": 0.14751210063695908,
"signal/batch_coverage_1/group_std_mean": 0.18181322515010834,
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03951258212327957,
"signal/batch_coverage_1/weight": 0.014299999922513962,
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021094229305163026,
"signal/batch_coverage_10/centered_abs_mean": 0.1506556123495102,
"signal/batch_coverage_10/group_std_mean": 0.1876886636018753,
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04036974348127842,
"signal/batch_coverage_10/weight": 0.014299999922513962,
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0021543753100559115,
"signal/batch_coverage_15/centered_abs_mean": 0.1514410898089409,
"signal/batch_coverage_15/group_std_mean": 0.18875804543495178,
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040581924840807915,
"signal/batch_coverage_15/weight": 0.014299999922513962,
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.0021656075259670615,
"signal/batch_coverage_20/centered_abs_mean": 0.1517779529094696,
"signal/batch_coverage_20/group_std_mean": 0.19017679244279861,
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04067254438996315,
"signal/batch_coverage_20/weight": 0.014299999922513962,
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.00217042479198426,
"signal/batch_coverage_25/centered_abs_mean": 0.14645803719758987,
"signal/batch_coverage_25/group_std_mean": 0.18481793254613876,
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03923363797366619,
"signal/batch_coverage_25/weight": 0.014299999922513962,
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0020943498238921165,
"signal/batch_coverage_5/centered_abs_mean": 0.15008513629436493,
"signal/batch_coverage_5/group_std_mean": 0.18554619699716568,
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04019570350646973,
"signal/batch_coverage_5/weight": 0.014299999922513962,
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002146217506378889,
"signal/brier_reward/centered_abs_mean": 0.10291799902915955,
"signal/brier_reward/group_std_mean": 0.13079295679926872,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19292094558477402,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010291799437254667,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012238562107086182,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015250732190907001,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022890268824994564,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012238562339916825,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2838418483734131,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3580092638731003,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5310377329587936,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028384186327457428,
"step": 312,
"total_flos": 0.0,
"train_loss": -0.01170685039258574,
"train_runtime": 64987.1739,
"train_samples_per_second": 0.308,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1085956914,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}