Files
RLCR-v4-ks-uniqueness-cov0-…/trainer_state.json
ModelHub XC 3e6387e6d0 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-noece-noaurc-scaletrue-cold-5x-math
Source: Original Platform
2026-04-25 02:06:07 +08:00

30771 lines
1.9 MiB

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.499193760077999,
"eval_steps": 50,
"global_step": 1040,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.5229855849935021,
"calibration/batch_distribution_entropy": 0.28178869591509825,
"calibration/confidence_entropy": 0.22202290431332766,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.47728606089979236,
"calibration/mean_confidence": 0.9145457949931112,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.019357638888888907,
"completions/max_length": 3991.8,
"completions/max_terminated_length": 3991.8,
"completions/mean_length": 516.1110229492188,
"completions/mean_terminated_length": 526.2912231445313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011999850001874977,
"grad_norm": 0.010887011885643005,
"learning_rate": 1.201923076923077e-07,
"loss": 0.0077,
"num_tokens": 9059807.0,
"reward": 0.4539343655109406,
"reward_std": 0.4154787003993988,
"rewards/accuracy_reward": 0.2559895753860474,
"rewards/brier_reward": 0.3074508547782898,
"rewards/confidence_uniqueness_reward": 0.285185831785202,
"rewards/format_reward": 0.5927951335906982,
"rewards/frontier_coverage_0": 0.2700425565242767,
"rewards/frontier_coverage_1": 0.2700425565242767,
"rewards/frontier_coverage_10": 0.2700425565242767,
"rewards/frontier_coverage_15": 0.2700425565242767,
"rewards/frontier_coverage_20": 0.2700425565242767,
"rewards/frontier_coverage_25": 0.2700425565242767,
"rewards/frontier_coverage_5": 0.2700425565242767,
"rewards/frontier_entropy_batch_reward": -0.5675290942192077,
"signal/accuracy_reward/centered_abs_mean": 0.3038682699203491,
"signal/accuracy_reward/group_std_mean": 0.3629651963710785,
"signal/accuracy_reward/group_zero_std_frac": 0.1027777798473835,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3931264102458954,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15193413496017455,
"signal/advantage_abs_mean": 0.8550266146659851,
"signal/advantage_pre_scale_abs_mean": 0.35794793963432314,
"signal/advantage_pre_scale_std": 0.42069290280342103,
"signal/advantage_std": 0.9842098474502563,
"signal/brier_reward/centered_abs_mean": 0.31524649262428284,
"signal/brier_reward/group_std_mean": 0.368165111541748,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08156516402959824,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.031524648517370225,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23518760204315187,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2871262729167938,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060857976973056796,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02351876050233841,
"signal/format_reward/centered_abs_mean": 0.4417480409145355,
"signal/format_reward/group_std_mean": 0.4756620943546295,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5716474652290344,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.22087402045726776,
"signal/frontier_coverage_0/centered_abs_mean": 0.3050322890281677,
"signal/frontier_coverage_0/group_std_mean": 0.36244935989379884,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011285928264260291,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004361961875110865,
"signal/frontier_coverage_1/centered_abs_mean": 0.3050322890281677,
"signal/frontier_coverage_1/group_std_mean": 0.36244935989379884,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011285928264260291,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004361961875110865,
"signal/frontier_coverage_10/centered_abs_mean": 0.3050322890281677,
"signal/frontier_coverage_10/group_std_mean": 0.36244935989379884,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011285928264260291,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004361961875110865,
"signal/frontier_coverage_15/centered_abs_mean": 0.3050322890281677,
"signal/frontier_coverage_15/group_std_mean": 0.36244935989379884,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011285928264260291,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004361961875110865,
"signal/frontier_coverage_20/centered_abs_mean": 0.3050322890281677,
"signal/frontier_coverage_20/group_std_mean": 0.36244935989379884,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011285928264260291,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004361961875110865,
"signal/frontier_coverage_25/centered_abs_mean": 0.3050322890281677,
"signal/frontier_coverage_25/group_std_mean": 0.36244935989379884,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.011285928264260291,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004361961875110865,
"signal/frontier_coverage_5/centered_abs_mean": 0.3050322890281677,
"signal/frontier_coverage_5/group_std_mean": 0.36244935989379884,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.011285928264260291,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004361961875110865,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45104606747627257,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4828143179416656,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11673455983400345,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.045104607939720154,
"step": 5
},
{
"calibration/aurc": 0.4756377973542508,
"calibration/batch_distribution_entropy": 0.25095197717333295,
"calibration/confidence_entropy": 0.22121285056009152,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.43047562493454417,
"calibration/mean_confidence": 0.9208003461593043,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018576388888888885,
"completions/max_length": 3964.6,
"completions/max_terminated_length": 3964.6,
"completions/mean_length": 506.35322265625,
"completions/mean_terminated_length": 515.9827209472656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.8,
"epoch": 0.023999700003749954,
"grad_norm": 0.011354845017194748,
"learning_rate": 2.403846153846154e-07,
"loss": 0.0094,
"num_tokens": 17975716.0,
"reward": 0.4697346866130829,
"reward_std": 0.4147303819656372,
"rewards/accuracy_reward": 0.26328125,
"rewards/brier_reward": 0.31500027775764466,
"rewards/confidence_uniqueness_reward": 0.301408588886261,
"rewards/format_reward": 0.6149305582046509,
"rewards/frontier_coverage_0": 0.27632899284362794,
"rewards/frontier_coverage_1": 0.27632899284362794,
"rewards/frontier_coverage_10": 0.27632899284362794,
"rewards/frontier_coverage_15": 0.27632899284362794,
"rewards/frontier_coverage_20": 0.27632899284362794,
"rewards/frontier_coverage_25": 0.27632899284362794,
"rewards/frontier_coverage_5": 0.27632899284362794,
"rewards/frontier_entropy_batch_reward": -0.5867263197898864,
"signal/accuracy_reward/centered_abs_mean": 0.31220160722732543,
"signal/accuracy_reward/group_std_mean": 0.3712098479270935,
"signal/accuracy_reward/group_zero_std_frac": 0.09166666865348816,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.40598496198654177,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15610080361366271,
"signal/advantage_abs_mean": 0.8426825761795044,
"signal/advantage_pre_scale_abs_mean": 0.35328022241592405,
"signal/advantage_pre_scale_std": 0.41959584355354307,
"signal/advantage_std": 0.9842082262039185,
"signal/brier_reward/centered_abs_mean": 0.3161942720413208,
"signal/brier_reward/group_std_mean": 0.3705894351005554,
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0822685867547989,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.031619428843259814,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23270548582077027,
"signal/confidence_uniqueness_reward/group_std_mean": 0.28428863286972045,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06063591316342354,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02327054850757122,
"signal/format_reward/centered_abs_mean": 0.4267469644546509,
"signal/format_reward/group_std_mean": 0.46675443053245547,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5556710243225098,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.21337348222732544,
"signal/frontier_coverage_0/centered_abs_mean": 0.30924100875854493,
"signal/frontier_coverage_0/group_std_mean": 0.3677679717540741,
"signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01150359958410263,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004422146081924439,
"signal/frontier_coverage_1/centered_abs_mean": 0.30924100875854493,
"signal/frontier_coverage_1/group_std_mean": 0.3677679717540741,
"signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01150359958410263,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004422146081924439,
"signal/frontier_coverage_10/centered_abs_mean": 0.30924100875854493,
"signal/frontier_coverage_10/group_std_mean": 0.3677679717540741,
"signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01150359958410263,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004422146081924439,
"signal/frontier_coverage_15/centered_abs_mean": 0.30924100875854493,
"signal/frontier_coverage_15/group_std_mean": 0.3677679717540741,
"signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01150359958410263,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004422146081924439,
"signal/frontier_coverage_20/centered_abs_mean": 0.30924100875854493,
"signal/frontier_coverage_20/group_std_mean": 0.3677679717540741,
"signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01150359958410263,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004422146081924439,
"signal/frontier_coverage_25/centered_abs_mean": 0.30924100875854493,
"signal/frontier_coverage_25/group_std_mean": 0.3677679717540741,
"signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01150359958410263,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004422146081924439,
"signal/frontier_coverage_5/centered_abs_mean": 0.30924100875854493,
"signal/frontier_coverage_5/group_std_mean": 0.3677679717540741,
"signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01150359958410263,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004422146081924439,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.43938422203063965,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4763322174549103,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11442900747060776,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.043938422203063966,
"step": 10
},
{
"calibration/aurc": 0.5024067621409587,
"calibration/batch_distribution_entropy": 0.24801621219288422,
"calibration/confidence_entropy": 0.2106205381039305,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4819867701410224,
"calibration/mean_confidence": 0.9234321037382752,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.019791666666666673,
"completions/max_length": 4029.0,
"completions/max_terminated_length": 4029.0,
"completions/mean_length": 504.4060791015625,
"completions/mean_terminated_length": 514.6802734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.03599955000562493,
"grad_norm": 0.007030100096017122,
"learning_rate": 3.6057692307692306e-07,
"loss": 0.0069,
"num_tokens": 26888458.0,
"reward": 0.46561177968978884,
"reward_std": 0.3956846117973328,
"rewards/accuracy_reward": 0.23828125,
"rewards/brier_reward": 0.299567723274231,
"rewards/confidence_uniqueness_reward": 0.3241762280464172,
"rewards/format_reward": 0.6395833253860473,
"rewards/frontier_coverage_0": 0.25464145839214325,
"rewards/frontier_coverage_1": 0.25464145839214325,
"rewards/frontier_coverage_10": 0.25464145839214325,
"rewards/frontier_coverage_15": 0.25464145839214325,
"rewards/frontier_coverage_20": 0.25464145839214325,
"rewards/frontier_coverage_25": 0.25464145839214325,
"rewards/frontier_coverage_5": 0.25464145839214325,
"rewards/frontier_entropy_batch_reward": -0.6118452191352844,
"signal/accuracy_reward/centered_abs_mean": 0.2900553375482559,
"signal/accuracy_reward/group_std_mean": 0.35240110754966736,
"signal/accuracy_reward/group_zero_std_frac": 0.10833333507180214,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3923906862735748,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14502766877412795,
"signal/advantage_abs_mean": 0.8354665875434876,
"signal/advantage_pre_scale_abs_mean": 0.33352160453796387,
"signal/advantage_pre_scale_std": 0.4006196856498718,
"signal/advantage_std": 0.9841970324516296,
"signal/brier_reward/centered_abs_mean": 0.30055932998657225,
"signal/brier_reward/group_std_mean": 0.3555553019046783,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08140757381916046,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.030055934190750123,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2306578904390335,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2857406497001648,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0626334622502327,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02306578904390335,
"signal/format_reward/centered_abs_mean": 0.4120876729488373,
"signal/format_reward/group_std_mean": 0.45783867239952086,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5594466686248779,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.20604383647441865,
"signal/frontier_coverage_0/centered_abs_mean": 0.29105416536331175,
"signal/frontier_coverage_0/group_std_mean": 0.35061998963356017,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0112642303109169,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0041620745323598385,
"signal/frontier_coverage_1/centered_abs_mean": 0.29105416536331175,
"signal/frontier_coverage_1/group_std_mean": 0.35061998963356017,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0112642303109169,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041620745323598385,
"signal/frontier_coverage_10/centered_abs_mean": 0.29105416536331175,
"signal/frontier_coverage_10/group_std_mean": 0.35061998963356017,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0112642303109169,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041620745323598385,
"signal/frontier_coverage_15/centered_abs_mean": 0.29105416536331175,
"signal/frontier_coverage_15/group_std_mean": 0.35061998963356017,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0112642303109169,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041620745323598385,
"signal/frontier_coverage_20/centered_abs_mean": 0.29105416536331175,
"signal/frontier_coverage_20/group_std_mean": 0.35061998963356017,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0112642303109169,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041620745323598385,
"signal/frontier_coverage_25/centered_abs_mean": 0.29105416536331175,
"signal/frontier_coverage_25/group_std_mean": 0.35061998963356017,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0112642303109169,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041620745323598385,
"signal/frontier_coverage_5/centered_abs_mean": 0.29105416536331175,
"signal/frontier_coverage_5/group_std_mean": 0.35061998963356017,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0112642303109169,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041620745323598385,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42714359760284426,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.46969146728515626,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11596711426973343,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042714360356330874,
"step": 15
},
{
"calibration/aurc": 0.5694586542786206,
"calibration/batch_distribution_entropy": 0.2448034104520702,
"calibration/confidence_entropy": 0.22888620321297623,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5197183535816638,
"calibration/mean_confidence": 0.9203614575376587,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01918402777777779,
"completions/max_length": 3979.0,
"completions/max_terminated_length": 3979.0,
"completions/mean_length": 462.74097900390626,
"completions/mean_terminated_length": 471.86796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.2,
"epoch": 0.04799940000749991,
"grad_norm": 0.02198331244289875,
"learning_rate": 4.807692307692308e-07,
"loss": 0.0011,
"num_tokens": 35332930.0,
"reward": 0.5601749300956727,
"reward_std": 0.3667020261287689,
"rewards/accuracy_reward": 0.2842013895511627,
"rewards/brier_reward": 0.3595377504825592,
"rewards/confidence_uniqueness_reward": 0.3906712234020233,
"rewards/format_reward": 0.7723958373069764,
"rewards/frontier_coverage_0": 0.3041978418827057,
"rewards/frontier_coverage_1": 0.3041978418827057,
"rewards/frontier_coverage_10": 0.3041978418827057,
"rewards/frontier_coverage_15": 0.3041978418827057,
"rewards/frontier_coverage_20": 0.3041978418827057,
"rewards/frontier_coverage_25": 0.3041978418827057,
"rewards/frontier_coverage_5": 0.3041978418827057,
"rewards/frontier_entropy_batch_reward": -0.7359476447105407,
"signal/accuracy_reward/centered_abs_mean": 0.30268012285232543,
"signal/accuracy_reward/group_std_mean": 0.3640410006046295,
"signal/accuracy_reward/group_zero_std_frac": 0.09444444626569748,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.4493618309497833,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15134006142616271,
"signal/advantage_abs_mean": 0.7916141271591186,
"signal/advantage_pre_scale_abs_mean": 0.29761979579925535,
"signal/advantage_pre_scale_std": 0.3721330463886261,
"signal/advantage_std": 0.984171736240387,
"signal/brier_reward/centered_abs_mean": 0.3021188259124756,
"signal/brier_reward/group_std_mean": 0.3569773733615875,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08976634591817856,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.030211882293224336,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21197229325771333,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2709016382694244,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0631466455757618,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021197229623794556,
"signal/format_reward/centered_abs_mean": 0.310796445608139,
"signal/format_reward/group_std_mean": 0.39093394875526427,
"signal/format_reward/group_zero_std_frac": 0.002777777798473835,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.4619352400302887,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1553982228040695,
"signal/frontier_coverage_0/centered_abs_mean": 0.3003900110721588,
"signal/frontier_coverage_0/group_std_mean": 0.3597902595996857,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012757665291428567,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004295577295124531,
"signal/frontier_coverage_1/centered_abs_mean": 0.3003900110721588,
"signal/frontier_coverage_1/group_std_mean": 0.3597902595996857,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012757665291428567,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004295577295124531,
"signal/frontier_coverage_10/centered_abs_mean": 0.3003900110721588,
"signal/frontier_coverage_10/group_std_mean": 0.3597902595996857,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012757665291428567,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004295577295124531,
"signal/frontier_coverage_15/centered_abs_mean": 0.3003900110721588,
"signal/frontier_coverage_15/group_std_mean": 0.3597902595996857,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012757665291428567,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004295577295124531,
"signal/frontier_coverage_20/centered_abs_mean": 0.3003900110721588,
"signal/frontier_coverage_20/group_std_mean": 0.3597902595996857,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012757665291428567,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004295577295124531,
"signal/frontier_coverage_25/centered_abs_mean": 0.3003900110721588,
"signal/frontier_coverage_25/group_std_mean": 0.3597902595996857,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012757665291428567,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004295577295124531,
"signal/frontier_coverage_5/centered_abs_mean": 0.3003900110721588,
"signal/frontier_coverage_5/group_std_mean": 0.3597902595996857,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012757665291428567,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004295577295124531,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3481315076351166,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42123820185661315,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10343978106975556,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0348131500184536,
"step": 20
},
{
"calibration/aurc": 0.5489235090143648,
"calibration/batch_distribution_entropy": 0.2749327391476236,
"calibration/buffer_distribution_entropy": 0.2693125108855333,
"calibration/confidence_entropy": 0.2360269107840506,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5054070723684616,
"calibration/mean_confidence": 0.91632770676523,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0109375,
"completions/max_length": 3966.0,
"completions/max_terminated_length": 3966.0,
"completions/mean_length": 412.9024353027344,
"completions/mean_terminated_length": 417.5066162109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 33.2,
"epoch": 0.05999925000937488,
"grad_norm": 0.010560334660112858,
"learning_rate": 6.009615384615385e-07,
"loss": -0.0219,
"num_tokens": 43214014.0,
"reward": 0.621948528289795,
"reward_std": 0.2870722770690918,
"rewards/accuracy_reward": 0.3277777791023254,
"rewards/brier_reward": 0.4194044291973114,
"rewards/confidence_uniqueness_reward": 0.47655481696128843,
"rewards/format_reward": 0.907031238079071,
"rewards/frontier_coverage_0": 0.015070206206291913,
"rewards/frontier_coverage_1": 0.015070206206291913,
"rewards/frontier_coverage_10": 0.015070206206291913,
"rewards/frontier_coverage_15": 0.015070206206291913,
"rewards/frontier_coverage_20": 0.015070206206291913,
"rewards/frontier_coverage_25": 0.015070206206291913,
"rewards/frontier_coverage_5": 0.015070206206291913,
"rewards/frontier_entropy_batch_reward": -0.8656046628952027,
"signal/accuracy_reward/centered_abs_mean": 0.32026910185813906,
"signal/accuracy_reward/group_std_mean": 0.3814137876033783,
"signal/accuracy_reward/group_zero_std_frac": 0.06666666865348816,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6105441927909852,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16013455092906953,
"signal/advantage_abs_mean": 0.7641073822975158,
"signal/advantage_pre_scale_abs_mean": 0.22727133631706237,
"signal/advantage_pre_scale_std": 0.29072420597076415,
"signal/advantage_std": 0.9840883612632751,
"signal/brier_reward/centered_abs_mean": 0.3018735468387604,
"signal/brier_reward/group_std_mean": 0.3560627937316895,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.11514985263347625,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.030187354236841202,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.184186252951622,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2384261429309845,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0701954871416092,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018418625369668006,
"signal/format_reward/centered_abs_mean": 0.152099609375,
"signal/format_reward/group_std_mean": 0.24653084874153136,
"signal/format_reward/group_zero_std_frac": 0.1444444440305233,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2882718056440353,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0760498046875,
"signal/frontier_coverage_0/centered_abs_mean": 0.025607530772686005,
"signal/frontier_coverage_0/group_std_mean": 0.04395473003387451,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013895100564695895,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0003661876980913803,
"signal/frontier_coverage_1/centered_abs_mean": 0.025607530772686005,
"signal/frontier_coverage_1/group_std_mean": 0.04395473003387451,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013895100564695895,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0003661876980913803,
"signal/frontier_coverage_10/centered_abs_mean": 0.025607530772686005,
"signal/frontier_coverage_10/group_std_mean": 0.04395473003387451,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013895100564695895,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0003661876980913803,
"signal/frontier_coverage_15/centered_abs_mean": 0.025607530772686005,
"signal/frontier_coverage_15/group_std_mean": 0.04395473003387451,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013895100564695895,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0003661876980913803,
"signal/frontier_coverage_20/centered_abs_mean": 0.025607530772686005,
"signal/frontier_coverage_20/group_std_mean": 0.04395473003387451,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013895100564695895,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0003661876980913803,
"signal/frontier_coverage_25/centered_abs_mean": 0.025607530772686005,
"signal/frontier_coverage_25/group_std_mean": 0.04395473003387451,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013895100564695895,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0003661876980913803,
"signal/frontier_coverage_5/centered_abs_mean": 0.025607530772686005,
"signal/frontier_coverage_5/group_std_mean": 0.04395473003387451,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013895100564695895,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0003661876980913803,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21398890316486358,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3219181656837463,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.03611111231148243,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.08149619698524475,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021398890390992166,
"step": 25
},
{
"calibration/aurc": 0.5382794629498553,
"calibration/batch_distribution_entropy": 0.309108020546252,
"calibration/buffer_distribution_entropy": 0.27122874768365013,
"calibration/confidence_entropy": 0.25586708450642537,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4759124424924992,
"calibration/mean_confidence": 0.9054469712207339,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666666666675,
"completions/max_length": 3831.0,
"completions/max_terminated_length": 3831.0,
"completions/mean_length": 410.4355041503906,
"completions/mean_terminated_length": 414.7814697265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 60.4,
"epoch": 0.07199910001124986,
"grad_norm": 0.01928151771426201,
"learning_rate": 7.211538461538461e-07,
"loss": -0.0436,
"num_tokens": 51052151.0,
"reward": 0.6726618885993958,
"reward_std": 0.25515561997890474,
"rewards/accuracy_reward": 0.36223958134651185,
"rewards/brier_reward": 0.4675338685512543,
"rewards/confidence_uniqueness_reward": 0.5357791066169739,
"rewards/format_reward": 0.9634548544883728,
"rewards/frontier_coverage_0": 0.009524069260805845,
"rewards/frontier_coverage_1": 0.009524069260805845,
"rewards/frontier_coverage_10": 0.009524069260805845,
"rewards/frontier_coverage_15": 0.009524069260805845,
"rewards/frontier_coverage_20": 0.009524069260805845,
"rewards/frontier_coverage_25": 0.009524069260805845,
"rewards/frontier_coverage_5": 0.009524069260805845,
"rewards/frontier_entropy_batch_reward": -0.9146998524665833,
"signal/accuracy_reward/centered_abs_mean": 0.32538520097732543,
"signal/accuracy_reward/group_std_mean": 0.3815238237380981,
"signal/accuracy_reward/group_zero_std_frac": 0.08611111417412758,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7917419075965881,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16269260048866271,
"signal/advantage_abs_mean": 0.7773361206054688,
"signal/advantage_pre_scale_abs_mean": 0.20761180222034453,
"signal/advantage_pre_scale_std": 0.26097519099712374,
"signal/advantage_std": 0.9839826345443725,
"signal/brier_reward/centered_abs_mean": 0.29560062289237976,
"signal/brier_reward/group_std_mean": 0.34639087319374084,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14382269978523254,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.029560060799121858,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.16958691775798798,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2160712420940399,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08290428072214126,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01695869229733944,
"signal/format_reward/centered_abs_mean": 0.06507703959941864,
"signal/format_reward/group_std_mean": 0.12960017919540406,
"signal/format_reward/group_zero_std_frac": 0.45000000596046447,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.15022960901260377,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.03253851979970932,
"signal/frontier_coverage_0/centered_abs_mean": 0.021855851635336876,
"signal/frontier_coverage_0/group_std_mean": 0.040756043046712875,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015435649547725916,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0003125386836472899,
"signal/frontier_coverage_1/centered_abs_mean": 0.021855851635336876,
"signal/frontier_coverage_1/group_std_mean": 0.040756043046712875,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015435649547725916,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0003125386836472899,
"signal/frontier_coverage_10/centered_abs_mean": 0.021855851635336876,
"signal/frontier_coverage_10/group_std_mean": 0.040756043046712875,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015435649547725916,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0003125386836472899,
"signal/frontier_coverage_15/centered_abs_mean": 0.021855851635336876,
"signal/frontier_coverage_15/group_std_mean": 0.040756043046712875,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015435649547725916,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0003125386836472899,
"signal/frontier_coverage_20/centered_abs_mean": 0.021855851635336876,
"signal/frontier_coverage_20/group_std_mean": 0.040756043046712875,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015435649547725916,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0003125386836472899,
"signal/frontier_coverage_25/centered_abs_mean": 0.021855851635336876,
"signal/frontier_coverage_25/group_std_mean": 0.040756043046712875,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015435649547725916,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0003125386836472899,
"signal/frontier_coverage_5/centered_abs_mean": 0.021855851635336876,
"signal/frontier_coverage_5/group_std_mean": 0.040756043046712875,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015435649547725916,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0003125386836472899,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.146951425075531,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25599651634693144,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1250000014901161,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.07040144726634026,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014695142395794392,
"step": 30
},
{
"calibration/aurc": 0.47850826375168937,
"calibration/batch_distribution_entropy": 0.3793815586454916,
"calibration/buffer_distribution_entropy": 0.2861277383673485,
"calibration/confidence_entropy": 0.29658167577248173,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4002400196916637,
"calibration/mean_confidence": 0.8884149740450364,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666666666675,
"completions/max_length": 3734.8,
"completions/max_terminated_length": 3734.8,
"completions/mean_length": 450.95677490234374,
"completions/mean_terminated_length": 455.6978393554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.6,
"epoch": 0.08399895001312484,
"grad_norm": 0.004714103415608406,
"learning_rate": 8.41346153846154e-07,
"loss": -0.0272,
"num_tokens": 59324613.0,
"reward": 0.7333313584327698,
"reward_std": 0.23513826131820678,
"rewards/accuracy_reward": 0.4403645873069763,
"rewards/brier_reward": 0.5525379419326782,
"rewards/confidence_uniqueness_reward": 0.590362799167633,
"rewards/format_reward": 0.983506953716278,
"rewards/frontier_coverage_0": 0.008734829723834991,
"rewards/frontier_coverage_1": 0.008734829723834991,
"rewards/frontier_coverage_10": 0.008734829723834991,
"rewards/frontier_coverage_15": 0.008734829723834991,
"rewards/frontier_coverage_20": 0.008734829723834991,
"rewards/frontier_coverage_25": 0.008734829723834991,
"rewards/frontier_coverage_5": 0.008734829723834991,
"rewards/frontier_entropy_batch_reward": -0.9376886129379273,
"signal/accuracy_reward/centered_abs_mean": 0.31514214277267455,
"signal/accuracy_reward/group_std_mean": 0.3757008254528046,
"signal/accuracy_reward/group_zero_std_frac": 0.07777777872979641,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8547364711761475,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15757107138633727,
"signal/advantage_abs_mean": 0.777402913570404,
"signal/advantage_pre_scale_abs_mean": 0.19204484224319457,
"signal/advantage_pre_scale_std": 0.24264540672302246,
"signal/advantage_std": 0.9839303016662597,
"signal/brier_reward/centered_abs_mean": 0.2717652380466461,
"signal/brier_reward/group_std_mean": 0.3256272315979004,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14762357771396636,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.027176523208618165,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17783839106559754,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2096118301153183,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0962824359536171,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017783838883042337,
"signal/format_reward/centered_abs_mean": 0.0297960065305233,
"signal/format_reward/group_std_mean": 0.06317889839410781,
"signal/format_reward/group_zero_std_frac": 0.7166666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08153303265571595,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01489800326526165,
"signal/frontier_coverage_0/centered_abs_mean": 0.02503722868859768,
"signal/frontier_coverage_0/group_std_mean": 0.04525566101074219,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019457651767879724,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00035803236532956363,
"signal/frontier_coverage_1/centered_abs_mean": 0.02503722868859768,
"signal/frontier_coverage_1/group_std_mean": 0.04525566101074219,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019457651767879724,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00035803236532956363,
"signal/frontier_coverage_10/centered_abs_mean": 0.02503722868859768,
"signal/frontier_coverage_10/group_std_mean": 0.04525566101074219,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019457651767879724,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00035803236532956363,
"signal/frontier_coverage_15/centered_abs_mean": 0.02503722868859768,
"signal/frontier_coverage_15/group_std_mean": 0.04525566101074219,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019457651767879724,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00035803236532956363,
"signal/frontier_coverage_20/centered_abs_mean": 0.02503722868859768,
"signal/frontier_coverage_20/group_std_mean": 0.04525566101074219,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019457651767879724,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00035803236532956363,
"signal/frontier_coverage_25/centered_abs_mean": 0.02503722868859768,
"signal/frontier_coverage_25/group_std_mean": 0.04525566101074219,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019457651767879724,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00035803236532956363,
"signal/frontier_coverage_5/centered_abs_mean": 0.02503722868859768,
"signal/frontier_coverage_5/group_std_mean": 0.04525566101074219,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019457651767879724,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00035803236532956363,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11057607978582382,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.21464248597621918,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.23055555820465087,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.060289456695318225,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011057608388364315,
"step": 35
},
{
"calibration/aurc": 0.4402947585970144,
"calibration/batch_distribution_entropy": 0.4824331907575866,
"calibration/buffer_distribution_entropy": 0.31577170793548615,
"calibration/confidence_entropy": 0.34483839347598094,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.010026385224274407,
"calibration/coverage@20%": 0.013192612137203167,
"calibration/coverage@25%": 0.01424802110817942,
"calibration/coverage@30%": 0.01424802110817942,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.33865251464556356,
"calibration/mean_confidence": 0.8666601711603379,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009114583333333348,
"completions/max_length": 3966.0,
"completions/max_terminated_length": 3966.0,
"completions/mean_length": 480.8202331542969,
"completions/mean_terminated_length": 485.25437622070314,
"completions/min_length": 0.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.09599880001499982,
"grad_norm": 0.0031753634102642536,
"learning_rate": 9.615384615384617e-07,
"loss": -0.0197,
"num_tokens": 67983182.0,
"reward": 0.7822724223136902,
"reward_std": 0.21780899465084075,
"rewards/accuracy_reward": 0.5059895753860474,
"rewards/brier_reward": 0.6228862762451172,
"rewards/confidence_uniqueness_reward": 0.6643950819969178,
"rewards/format_reward": 0.9869791507720947,
"rewards/frontier_coverage_0": 0.007964784186333418,
"rewards/frontier_coverage_1": 0.007964784186333418,
"rewards/frontier_coverage_10": 0.007964784186333418,
"rewards/frontier_coverage_15": 0.007964784186333418,
"rewards/frontier_coverage_20": 0.007964784186333418,
"rewards/frontier_coverage_25": 0.007964784186333418,
"rewards/frontier_coverage_5": 0.007964784186333418,
"rewards/frontier_entropy_batch_reward": -0.9373735666275025,
"signal/accuracy_reward/centered_abs_mean": 0.28925238847732543,
"signal/accuracy_reward/group_std_mean": 0.3537147223949432,
"signal/accuracy_reward/group_zero_std_frac": 0.10000000167638064,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0984381198883058,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14462619423866271,
"signal/advantage_abs_mean": 0.7445023059844971,
"signal/advantage_pre_scale_abs_mean": 0.1732119858264923,
"signal/advantage_pre_scale_std": 0.22844835221767426,
"signal/advantage_std": 0.9837140202522278,
"signal/brier_reward/centered_abs_mean": 0.2372313529253006,
"signal/brier_reward/group_std_mean": 0.2905817449092865,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18049295842647553,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02372313551604748,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.14429336190223693,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1727246791124344,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.11001295447349549,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014429337345063686,
"signal/format_reward/centered_abs_mean": 0.02393663190305233,
"signal/format_reward/group_std_mean": 0.05173143371939659,
"signal/format_reward/group_zero_std_frac": 0.7666666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09135463684797288,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011968315951526165,
"signal/frontier_coverage_0/centered_abs_mean": 0.032622770965099336,
"signal/frontier_coverage_0/group_std_mean": 0.05645905360579491,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035575965885072947,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00046650563017465175,
"signal/frontier_coverage_1/centered_abs_mean": 0.032622770965099336,
"signal/frontier_coverage_1/group_std_mean": 0.05645905360579491,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035575965885072947,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00046650563017465175,
"signal/frontier_coverage_10/centered_abs_mean": 0.032622770965099336,
"signal/frontier_coverage_10/group_std_mean": 0.05645905360579491,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035575965885072947,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00046650563017465175,
"signal/frontier_coverage_15/centered_abs_mean": 0.032622770965099336,
"signal/frontier_coverage_15/group_std_mean": 0.05645905360579491,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035575965885072947,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00046650563017465175,
"signal/frontier_coverage_20/centered_abs_mean": 0.032622770965099336,
"signal/frontier_coverage_20/group_std_mean": 0.05645905360579491,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035575965885072947,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00046650563017465175,
"signal/frontier_coverage_25/centered_abs_mean": 0.032622770965099336,
"signal/frontier_coverage_25/group_std_mean": 0.05645905360579491,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035575965885072947,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00046650563017465175,
"signal/frontier_coverage_5/centered_abs_mean": 0.032622770965099336,
"signal/frontier_coverage_5/group_std_mean": 0.05645905360579491,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035575965885072947,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00046650563017465175,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11271507740020752,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.22643994092941283,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.20555555522441865,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0858034148812294,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011271507106721401,
"step": 40
},
{
"calibration/aurc": 0.3377478184440539,
"calibration/batch_distribution_entropy": 0.5848481948438579,
"calibration/buffer_distribution_entropy": 0.35423291152038805,
"calibration/confidence_entropy": 0.384831962509367,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.013829787234042554,
"calibration/coverage@25%": 0.18736678360238054,
"calibration/coverage@30%": 0.4657049563747111,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.22727172809322985,
"calibration/mean_confidence": 0.8391492923935034,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009375,
"completions/max_length": 4016.0,
"completions/max_terminated_length": 4016.0,
"completions/mean_length": 543.1631042480469,
"completions/mean_terminated_length": 548.3945129394531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 111.6,
"epoch": 0.1079986500168748,
"grad_norm": 0.01239377073943615,
"learning_rate": 1.0817307692307693e-06,
"loss": -0.0188,
"num_tokens": 77375685.0,
"reward": 0.813059675693512,
"reward_std": 0.2060342937707901,
"rewards/accuracy_reward": 0.5426215350627899,
"rewards/brier_reward": 0.6699079275131226,
"rewards/confidence_uniqueness_reward": 0.7288915514945984,
"rewards/format_reward": 0.9876736164093017,
"rewards/frontier_coverage_0": 0.0066648813604842875,
"rewards/frontier_coverage_1": 0.0066648813604842875,
"rewards/frontier_coverage_10": 0.0066648813604842875,
"rewards/frontier_coverage_15": 0.0066648813604842875,
"rewards/frontier_coverage_20": 0.0066648813604842875,
"rewards/frontier_coverage_25": 0.0066648813604842875,
"rewards/frontier_coverage_5": 0.0066648813604842875,
"rewards/frontier_entropy_batch_reward": -0.9263499140739441,
"signal/accuracy_reward/centered_abs_mean": 0.2699490010738373,
"signal/accuracy_reward/group_std_mean": 0.3382055342197418,
"signal/accuracy_reward/group_zero_std_frac": 0.11111111342906951,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.247203779220581,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13497450053691865,
"signal/advantage_abs_mean": 0.7202765941619873,
"signal/advantage_pre_scale_abs_mean": 0.15997426211833954,
"signal/advantage_pre_scale_std": 0.21895503699779512,
"signal/advantage_std": 0.9835591673851013,
"signal/brier_reward/centered_abs_mean": 0.20789836943149567,
"signal/brier_reward/group_std_mean": 0.26231162548065184,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19199275672435762,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020789837837219237,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10277863144874573,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13384136855602263,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0936692550778389,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010277863219380378,
"signal/format_reward/centered_abs_mean": 0.02222222238779068,
"signal/format_reward/group_std_mean": 0.046404258161783216,
"signal/format_reward/group_zero_std_frac": 0.794444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10093065053224563,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01111111119389534,
"signal/frontier_coverage_0/centered_abs_mean": 0.04338513538241386,
"signal/frontier_coverage_0/group_std_mean": 0.06866179034113884,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00571465864777565,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0006204074248671532,
"signal/frontier_coverage_1/centered_abs_mean": 0.04338513538241386,
"signal/frontier_coverage_1/group_std_mean": 0.06866179034113884,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00571465864777565,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0006204074248671532,
"signal/frontier_coverage_10/centered_abs_mean": 0.04338513538241386,
"signal/frontier_coverage_10/group_std_mean": 0.06866179034113884,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00571465864777565,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0006204074248671532,
"signal/frontier_coverage_15/centered_abs_mean": 0.04338513538241386,
"signal/frontier_coverage_15/group_std_mean": 0.06866179034113884,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00571465864777565,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0006204074248671532,
"signal/frontier_coverage_20/centered_abs_mean": 0.04338513538241386,
"signal/frontier_coverage_20/group_std_mean": 0.06866179034113884,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00571465864777565,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006204074248671532,
"signal/frontier_coverage_25/centered_abs_mean": 0.04338513538241386,
"signal/frontier_coverage_25/group_std_mean": 0.06866179034113884,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00571465864777565,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006204074248671532,
"signal/frontier_coverage_5/centered_abs_mean": 0.04338513538241386,
"signal/frontier_coverage_5/group_std_mean": 0.06866179034113884,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00571465864777565,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0006204074248671532,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1316349670290947,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25327491760253906,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.16388889029622078,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12047145068645478,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013163497298955917,
"step": 45
},
{
"calibration/aurc": 0.44876515017592206,
"calibration/batch_distribution_entropy": 0.6788178143136975,
"calibration/buffer_distribution_entropy": 0.40940430199471384,
"calibration/confidence_entropy": 0.44995376994049635,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.035263157894736843,
"calibration/coverage@25%": 0.05526315789473685,
"calibration/coverage@30%": 0.06649310441880102,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.286977385931817,
"calibration/mean_confidence": 0.7954010856289189,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01493055555555558,
"completions/max_length": 3769.0,
"completions/max_terminated_length": 3769.0,
"completions/mean_length": 586.178466796875,
"completions/mean_terminated_length": 595.0872192382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 129.4,
"epoch": 0.11999850001874976,
"grad_norm": 0.0035637153778225183,
"learning_rate": 1.201923076923077e-06,
"loss": -0.029,
"num_tokens": 87226061.0,
"reward": 0.838405168056488,
"reward_std": 0.19529550671577453,
"rewards/accuracy_reward": 0.578906261920929,
"rewards/brier_reward": 0.7015391111373901,
"rewards/confidence_uniqueness_reward": 0.7591113924980164,
"rewards/format_reward": 0.9816840291023254,
"rewards/frontier_coverage_0": -0.0034364996245130897,
"rewards/frontier_coverage_1": -0.0034364996245130897,
"rewards/frontier_coverage_10": -0.0034364996245130897,
"rewards/frontier_coverage_15": -0.0034364996245130897,
"rewards/frontier_coverage_20": -0.0034364996245130897,
"rewards/frontier_coverage_25": -0.0034364996245130897,
"rewards/frontier_coverage_5": -0.0034364996245130897,
"rewards/frontier_entropy_batch_reward": -0.8761105418205262,
"signal/accuracy_reward/centered_abs_mean": 0.23926323652267456,
"signal/accuracy_reward/group_std_mean": 0.3040441036224365,
"signal/accuracy_reward/group_zero_std_frac": 0.17777777910232545,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0301209807395935,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11963161826133728,
"signal/advantage_abs_mean": 0.7018255352973938,
"signal/advantage_pre_scale_abs_mean": 0.14790762662887574,
"signal/advantage_pre_scale_std": 0.2124355673789978,
"signal/advantage_std": 0.98361257314682,
"signal/brier_reward/centered_abs_mean": 0.1796101748943329,
"signal/brier_reward/group_std_mean": 0.22917198836803437,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15517139434814453,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01796101815998554,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11911777704954148,
"signal/confidence_uniqueness_reward/group_std_mean": 0.14986335337162018,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.1029736876487732,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011911777965724468,
"signal/format_reward/centered_abs_mean": 0.03136393222957849,
"signal/format_reward/group_std_mean": 0.06070434525609016,
"signal/format_reward/group_zero_std_frac": 0.7444444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.13335922509431838,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.015681966114789246,
"signal/frontier_coverage_0/centered_abs_mean": 0.05662049055099487,
"signal/frontier_coverage_0/group_std_mean": 0.0813647210597992,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0069804366677999495,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008096729870885611,
"signal/frontier_coverage_1/centered_abs_mean": 0.05662049055099487,
"signal/frontier_coverage_1/group_std_mean": 0.0813647210597992,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0069804366677999495,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008096729870885611,
"signal/frontier_coverage_10/centered_abs_mean": 0.05662049055099487,
"signal/frontier_coverage_10/group_std_mean": 0.0813647210597992,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0069804366677999495,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008096729870885611,
"signal/frontier_coverage_15/centered_abs_mean": 0.05662049055099487,
"signal/frontier_coverage_15/group_std_mean": 0.0813647210597992,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0069804366677999495,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008096729870885611,
"signal/frontier_coverage_20/centered_abs_mean": 0.05662049055099487,
"signal/frontier_coverage_20/group_std_mean": 0.0813647210597992,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0069804366677999495,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008096729870885611,
"signal/frontier_coverage_25/centered_abs_mean": 0.05662049055099487,
"signal/frontier_coverage_25/group_std_mean": 0.0813647210597992,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0069804366677999495,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008096729870885611,
"signal/frontier_coverage_5/centered_abs_mean": 0.05662049055099487,
"signal/frontier_coverage_5/group_std_mean": 0.0813647210597992,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0069804366677999495,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008096729870885611,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20503715574741363,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33353949189186094,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08333333432674409,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.17352269291877748,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.020503715239465238,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_calibration/aurc": 0.2739726821231387,
"eval_calibration/batch_distribution_entropy": 0.6931429942513239,
"eval_calibration/buffer_distribution_entropy": 0.44627538022254415,
"eval_calibration/confidence_entropy": 0.49130108893650654,
"eval_calibration/coverage@0%": 0.0871415770609319,
"eval_calibration/coverage@1%": 0.0871415770609319,
"eval_calibration/coverage@10%": 0.0871415770609319,
"eval_calibration/coverage@15%": 0.22946908602150537,
"eval_calibration/coverage@20%": 0.4065524193548387,
"eval_calibration/coverage@25%": 0.5317204301075269,
"eval_calibration/coverage@30%": 0.6578853046594982,
"eval_calibration/coverage@5%": 0.0871415770609319,
"eval_calibration/ece": 0.18241153673835128,
"eval_calibration/mean_confidence": 0.7556062051971327,
"eval_completions/clipped_ratio": 0.013020833333333351,
"eval_completions/max_length": 3520.0,
"eval_completions/max_terminated_length": 3520.0,
"eval_completions/mean_length": 614.679189046224,
"eval_completions/mean_terminated_length": 622.7157389322916,
"eval_completions/min_length": 46.666666666666664,
"eval_completions/min_terminated_length": 193.33333333333334,
"eval_loss": 0.0,
"eval_num_tokens": 87226061.0,
"eval_reward": 0.8327561815579733,
"eval_reward_std": 0.282521386941274,
"eval_rewards/accuracy_reward": 0.5876736144224802,
"eval_rewards/brier_reward": 0.7180667718251547,
"eval_rewards/confidence_uniqueness_reward": 0.7620020310084025,
"eval_rewards/format_reward": 0.9791666666666666,
"eval_rewards/frontier_coverage_0": -0.0075345072740068035,
"eval_rewards/frontier_coverage_1": -0.0075345072740068035,
"eval_rewards/frontier_coverage_10": -0.0075345072740068035,
"eval_rewards/frontier_coverage_15": -0.0075345072740068035,
"eval_rewards/frontier_coverage_20": -0.0075345072740068035,
"eval_rewards/frontier_coverage_25": -0.0075345072740068035,
"eval_rewards/frontier_coverage_5": -0.0075345072740068035,
"eval_rewards/frontier_entropy_batch_reward": -0.9791666666666666,
"eval_runtime": 202.8998,
"eval_samples_per_second": 4.929,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4644639740387599,
"eval_signal/accuracy_reward/group_std_mean": 0.4887815515200297,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.830911248922348,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23223198701937994,
"eval_signal/advantage_abs_mean": 0.9150658249855042,
"eval_signal/advantage_pre_scale_abs_mean": 0.25886716693639755,
"eval_signal/advantage_pre_scale_std": 0.28026849031448364,
"eval_signal/advantage_std": 0.986470510562261,
"eval_signal/brier_reward/centered_abs_mean": 0.24225710580746332,
"eval_signal/brier_reward/group_std_mean": 0.2897177239259084,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08656309793392818,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.024225711201628048,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.1251125161846479,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1669869671265284,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.044979797676205635,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012511251804729303,
"eval_signal/format_reward/centered_abs_mean": 0.039713542287548385,
"eval_signal/format_reward/group_std_mean": 0.09991467806200187,
"eval_signal/format_reward/group_zero_std_frac": 0.5000000099341074,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.07029524445533752,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.019856771143774193,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.08148297543327014,
"eval_signal/frontier_coverage_0/group_std_mean": 0.1163974292576313,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004172447098729511,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011652065246986847,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.08148297543327014,
"eval_signal/frontier_coverage_1/group_std_mean": 0.1163974292576313,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004172447098729511,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011652065246986847,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.08148297543327014,
"eval_signal/frontier_coverage_10/group_std_mean": 0.1163974292576313,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004172447098729511,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011652065246986847,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.08148297543327014,
"eval_signal/frontier_coverage_15/group_std_mean": 0.1163974292576313,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004172447098729511,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011652065246986847,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.08148297543327014,
"eval_signal/frontier_coverage_20/group_std_mean": 0.1163974292576313,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004172447098729511,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011652065246986847,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08148297543327014,
"eval_signal/frontier_coverage_25/group_std_mean": 0.1163974292576313,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004172447098729511,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011652065246986847,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.08148297543327014,
"eval_signal/frontier_coverage_5/group_std_mean": 0.1163974292576313,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004172447098729511,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011652065246986847,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.039713542287548385,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.09991467806200187,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5000000099341074,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014059049698213736,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.003971354415019353,
"eval_steps_per_second": 0.03,
"step": 50
},
{
"epoch": 0.11999850001874976,
"step": 50,
"train_probe_calibration/aurc": 0.36517490575956213,
"train_probe_calibration/batch_distribution_entropy": 0.6947527552817748,
"train_probe_calibration/buffer_distribution_entropy": 0.4524707357411102,
"train_probe_calibration/confidence_entropy": 0.4870250294794456,
"train_probe_calibration/coverage@0%": 0.03245967741935484,
"train_probe_calibration/coverage@1%": 0.03245967741935484,
"train_probe_calibration/coverage@10%": 0.03245967741935484,
"train_probe_calibration/coverage@15%": 0.03245967741935484,
"train_probe_calibration/coverage@20%": 0.05396505376344086,
"train_probe_calibration/coverage@25%": 0.28139560931899643,
"train_probe_calibration/coverage@30%": 0.3445900537634408,
"train_probe_calibration/coverage@5%": 0.03245967741935484,
"train_probe_calibration/ece": 0.20738995295698923,
"train_probe_calibration/mean_confidence": 0.7651828517025089,
"train_probe_completions/clipped_ratio": 0.013715277777777776,
"train_probe_completions/max_length": 2729.3333333333335,
"train_probe_completions/max_terminated_length": 2729.3333333333335,
"train_probe_completions/mean_length": 602.330576578776,
"train_probe_completions/mean_terminated_length": 610.7609659830729,
"train_probe_completions/min_length": 34.833333333333336,
"train_probe_completions/min_terminated_length": 196.16666666666666,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 87226061.0,
"train_probe_reward": 0.854498436053594,
"train_probe_reward_std": 0.2775838126738866,
"train_probe_rewards/accuracy_reward": 0.625,
"train_probe_rewards/brier_reward": 0.7405163049697876,
"train_probe_rewards/confidence_uniqueness_reward": 0.7529355386892954,
"train_probe_rewards/format_reward": 0.984375,
"train_probe_rewards/frontier_coverage_0": -0.010956409852951765,
"train_probe_rewards/frontier_coverage_1": -0.010956409852951765,
"train_probe_rewards/frontier_coverage_10": -0.010956409852951765,
"train_probe_rewards/frontier_coverage_15": -0.010956409852951765,
"train_probe_rewards/frontier_coverage_20": -0.010956409852951765,
"train_probe_rewards/frontier_coverage_25": -0.010956409852951765,
"train_probe_rewards/frontier_coverage_5": -0.010956409852951765,
"train_probe_rewards/frontier_entropy_batch_reward": -0.984375,
"train_probe_runtime": 200.0763,
"train_probe_samples_per_second": 4.998,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4587673594554265,
"train_probe_signal/accuracy_reward/group_std_mean": 0.48622022569179535,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8326980173587799,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22938367972771326,
"train_probe_signal/advantage_abs_mean": 0.9171395301818848,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.2545117884874344,
"train_probe_signal/advantage_pre_scale_std": 0.27495451271533966,
"train_probe_signal/advantage_std": 0.9864658315976461,
"train_probe_signal/brier_reward/centered_abs_mean": 0.23686287055412927,
"train_probe_signal/brier_reward/group_std_mean": 0.28272825479507446,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08592941612005234,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.023686287303765614,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.12726762145757675,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.1612810716032982,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04609783055881659,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012726762021581331,
"train_probe_signal/format_reward/centered_abs_mean": 0.029947916821887095,
"train_probe_signal/format_reward/group_std_mean": 0.07942011921356122,
"train_probe_signal/format_reward/group_zero_std_frac": 0.5833333482344946,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.053789831697940826,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.014973958410943547,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.0769535352786382,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.10913310199975967,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00399844697676599,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011004355813687046,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0769535352786382,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.10913310199975967,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00399844697676599,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011004355813687046,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0769535352786382,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.10913310199975967,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00399844697676599,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011004355813687046,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0769535352786382,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.10913310199975967,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00399844697676599,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011004355813687046,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0769535352786382,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.10913310199975967,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00399844697676599,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011004355813687046,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0769535352786382,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.10913310199975967,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00399844697676599,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011004355813687046,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0769535352786382,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.10913310199975967,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00399844697676599,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011004355813687046,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.029947916821887095,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.07942011921356122,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5833333482344946,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01075796662674596,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0029947917792014778,
"train_probe_steps_per_second": 0.03
},
{
"calibration/aurc": 0.38530740575080935,
"calibration/batch_distribution_entropy": 0.7603393169444657,
"calibration/buffer_distribution_entropy": 0.47986178564628323,
"calibration/confidence_entropy": 0.5200688029931547,
"calibration/coverage@0%": 0.004780705518839584,
"calibration/coverage@1%": 0.004780705518839584,
"calibration/coverage@10%": 0.008445626984808172,
"calibration/coverage@15%": 0.008445626984808172,
"calibration/coverage@20%": 0.019963951592138013,
"calibration/coverage@25%": 0.14104588392992415,
"calibration/coverage@30%": 0.4016393442622951,
"calibration/coverage@5%": 0.004780705518839584,
"calibration/ece": 0.17737637549373997,
"calibration/mean_confidence": 0.7278365423980295,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012065972222222231,
"completions/max_length": 4011.6,
"completions/max_terminated_length": 4011.6,
"completions/mean_length": 613.4942016601562,
"completions/mean_terminated_length": 621.0005493164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.13199835002062474,
"grad_norm": 0.010351327247917652,
"learning_rate": 1.3221153846153848e-06,
"loss": -0.0189,
"num_tokens": 97374090.0,
"reward": 0.8857142567634583,
"reward_std": 0.18409750163555144,
"rewards/accuracy_reward": 0.6035590291023254,
"rewards/brier_reward": 0.7294302701950073,
"rewards/confidence_uniqueness_reward": 0.8790017008781433,
"rewards/format_reward": 0.9854166746139527,
"rewards/frontier_coverage_0": -0.01264396500773728,
"rewards/frontier_coverage_1": -0.01264396500773728,
"rewards/frontier_coverage_10": -0.01264396500773728,
"rewards/frontier_coverage_15": -0.01264396500773728,
"rewards/frontier_coverage_20": -0.01264396500773728,
"rewards/frontier_coverage_25": -0.01264396500773728,
"rewards/frontier_coverage_5": -0.01264396500773728,
"rewards/frontier_entropy_batch_reward": -0.6835112333297729,
"signal/accuracy_reward/centered_abs_mean": 0.22823893129825593,
"signal/accuracy_reward/group_std_mean": 0.2900137364864349,
"signal/accuracy_reward/group_zero_std_frac": 0.2194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9254291892051697,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11411946564912796,
"signal/advantage_abs_mean": 0.7338205933570862,
"signal/advantage_pre_scale_abs_mean": 0.1410813570022583,
"signal/advantage_pre_scale_std": 0.20234240591526031,
"signal/advantage_std": 0.983665120601654,
"signal/brier_reward/centered_abs_mean": 0.15796597599983214,
"signal/brier_reward/group_std_mean": 0.20258763134479524,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12823985517024994,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01579659804701805,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07788840532302857,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10405172556638717,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06344843953847885,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007788840495049953,
"signal/format_reward/centered_abs_mean": 0.02560763880610466,
"signal/format_reward/group_std_mean": 0.04888112768530846,
"signal/format_reward/group_zero_std_frac": 0.800000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1036648079752922,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01280381940305233,
"signal/frontier_coverage_0/centered_abs_mean": 0.08440108299255371,
"signal/frontier_coverage_0/group_std_mean": 0.1161453977227211,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.009755328483879565,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012069354532286524,
"signal/frontier_coverage_1/centered_abs_mean": 0.08440108299255371,
"signal/frontier_coverage_1/group_std_mean": 0.1161453977227211,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.009755328483879565,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012069354532286524,
"signal/frontier_coverage_10/centered_abs_mean": 0.08440108299255371,
"signal/frontier_coverage_10/group_std_mean": 0.1161453977227211,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.009755328483879565,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012069354532286524,
"signal/frontier_coverage_15/centered_abs_mean": 0.08440108299255371,
"signal/frontier_coverage_15/group_std_mean": 0.1161453977227211,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.009755328483879565,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012069354532286524,
"signal/frontier_coverage_20/centered_abs_mean": 0.08440108299255371,
"signal/frontier_coverage_20/group_std_mean": 0.1161453977227211,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009755328483879565,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012069354532286524,
"signal/frontier_coverage_25/centered_abs_mean": 0.08440108299255371,
"signal/frontier_coverage_25/group_std_mean": 0.1161453977227211,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.009755328483879565,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012069354532286524,
"signal/frontier_coverage_5/centered_abs_mean": 0.08440108299255371,
"signal/frontier_coverage_5/group_std_mean": 0.1161453977227211,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.009755328483879565,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012069354532286524,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3892269194126129,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.46654823422431946,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.31475735306739805,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03892269358038902,
"step": 55
},
{
"calibration/aurc": 0.4003518482751858,
"calibration/batch_distribution_entropy": 0.911201528778801,
"calibration/buffer_distribution_entropy": 0.5497689485268447,
"calibration/confidence_entropy": 0.5590389248971067,
"calibration/coverage@0%": 0.002644209126607614,
"calibration/coverage@1%": 0.002644209126607614,
"calibration/coverage@10%": 0.011132272787085068,
"calibration/coverage@15%": 0.15277683512130258,
"calibration/coverage@20%": 0.20368842012441699,
"calibration/coverage@25%": 0.20368842012441699,
"calibration/coverage@30%": 0.2042161246099051,
"calibration/coverage@5%": 0.002644209126607614,
"calibration/ece": 0.19523114615371426,
"calibration/mean_confidence": 0.589335139080694,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015104166666666674,
"completions/max_length": 3861.6,
"completions/max_terminated_length": 3861.6,
"completions/mean_length": 621.9077392578125,
"completions/mean_terminated_length": 631.4978149414062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.4,
"epoch": 0.14399820002249972,
"grad_norm": 0.0028857546858489513,
"learning_rate": 1.4423076923076922e-06,
"loss": -0.0281,
"num_tokens": 107635043.0,
"reward": 0.909643542766571,
"reward_std": 0.18056119680404664,
"rewards/accuracy_reward": 0.5854166507720947,
"rewards/brier_reward": 0.7150079846382141,
"rewards/confidence_uniqueness_reward": 0.9329192876815796,
"rewards/format_reward": 0.9831597089767456,
"rewards/frontier_coverage_0": -0.022861182875931262,
"rewards/frontier_coverage_1": -0.022861182875931262,
"rewards/frontier_coverage_10": -0.022861182875931262,
"rewards/frontier_coverage_15": -0.022861182875931262,
"rewards/frontier_coverage_20": -0.022861182875931262,
"rewards/frontier_coverage_25": -0.022861182875931262,
"rewards/frontier_coverage_5": -0.022861182875931262,
"rewards/frontier_entropy_batch_reward": -0.37148996591567995,
"signal/accuracy_reward/centered_abs_mean": 0.22856987714767457,
"signal/accuracy_reward/group_std_mean": 0.2946248114109039,
"signal/accuracy_reward/group_zero_std_frac": 0.1972222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8809527635574341,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11428493857383729,
"signal/advantage_abs_mean": 0.7478281497955322,
"signal/advantage_pre_scale_abs_mean": 0.13646075129508972,
"signal/advantage_pre_scale_std": 0.19674695134162903,
"signal/advantage_std": 0.9837003707885742,
"signal/brier_reward/centered_abs_mean": 0.18786855041980743,
"signal/brier_reward/group_std_mean": 0.2366286039352417,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14518831074237823,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.018786855787038804,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.042062591016292575,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07180588617920876,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03262592852115631,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042062591761350635,
"signal/format_reward/centered_abs_mean": 0.02958984412252903,
"signal/format_reward/group_std_mean": 0.057399775832891464,
"signal/format_reward/group_zero_std_frac": 0.7611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11437597423791886,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014794922061264515,
"signal/frontier_coverage_0/centered_abs_mean": 0.1847160905599594,
"signal/frontier_coverage_0/group_std_mean": 0.24841379821300508,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.020291910134255887,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002641439996659756,
"signal/frontier_coverage_1/centered_abs_mean": 0.1847160905599594,
"signal/frontier_coverage_1/group_std_mean": 0.24841379821300508,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.020291910134255887,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002641439996659756,
"signal/frontier_coverage_10/centered_abs_mean": 0.1847160905599594,
"signal/frontier_coverage_10/group_std_mean": 0.24841379821300508,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020291910134255887,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002641439996659756,
"signal/frontier_coverage_15/centered_abs_mean": 0.1847160905599594,
"signal/frontier_coverage_15/group_std_mean": 0.24841379821300508,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020291910134255887,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002641439996659756,
"signal/frontier_coverage_20/centered_abs_mean": 0.1847160905599594,
"signal/frontier_coverage_20/group_std_mean": 0.24841379821300508,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020291910134255887,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002641439996659756,
"signal/frontier_coverage_25/centered_abs_mean": 0.1847160905599594,
"signal/frontier_coverage_25/group_std_mean": 0.24841379821300508,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.020291910134255887,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002641439996659756,
"signal/frontier_coverage_5/centered_abs_mean": 0.1847160905599594,
"signal/frontier_coverage_5/group_std_mean": 0.24841379821300508,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.020291910134255887,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002641439996659756,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39274935722351073,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.45597169995307923,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.30623041093349457,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03927493616938591,
"step": 60
},
{
"calibration/aurc": 0.3440475068274386,
"calibration/batch_distribution_entropy": 0.9846613687513786,
"calibration/buffer_distribution_entropy": 0.631180763920832,
"calibration/confidence_entropy": 0.5175700592510253,
"calibration/coverage@0%": 0.005349028881419754,
"calibration/coverage@1%": 0.005349028881419754,
"calibration/coverage@10%": 0.007517050561636556,
"calibration/coverage@15%": 0.007517050561636556,
"calibration/coverage@20%": 0.037326611292945386,
"calibration/coverage@25%": 0.35507176012497793,
"calibration/coverage@30%": 0.5909584488716831,
"calibration/coverage@5%": 0.005349028881419754,
"calibration/ece": 0.22226896945146724,
"calibration/mean_confidence": 0.539453303271356,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01206597222222221,
"completions/max_length": 3910.0,
"completions/max_terminated_length": 3910.0,
"completions/mean_length": 604.5211791992188,
"completions/mean_terminated_length": 611.927783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.8,
"epoch": 0.1559980500243747,
"grad_norm": 0.0038041872903704643,
"learning_rate": 1.5625e-06,
"loss": -0.0194,
"num_tokens": 117693175.0,
"reward": 0.9383891940116882,
"reward_std": 0.16168527901172638,
"rewards/accuracy_reward": 0.6201388955116272,
"rewards/brier_reward": 0.689646327495575,
"rewards/confidence_uniqueness_reward": 0.9409290075302124,
"rewards/format_reward": 0.9869791746139527,
"rewards/frontier_coverage_0": -0.06118553690612316,
"rewards/frontier_coverage_1": -0.06118553690612316,
"rewards/frontier_coverage_10": -0.06118553690612316,
"rewards/frontier_coverage_15": -0.06118553690612316,
"rewards/frontier_coverage_20": -0.06118553690612316,
"rewards/frontier_coverage_25": -0.06118553690612316,
"rewards/frontier_coverage_5": -0.06118553690612316,
"rewards/frontier_entropy_batch_reward": -0.22102709710597992,
"signal/accuracy_reward/centered_abs_mean": 0.20631510019302368,
"signal/accuracy_reward/group_std_mean": 0.2710012078285217,
"signal/accuracy_reward/group_zero_std_frac": 0.2416666716337204,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8475908994674682,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10315755009651184,
"signal/advantage_abs_mean": 0.751226258277893,
"signal/advantage_pre_scale_abs_mean": 0.12262100130319595,
"signal/advantage_pre_scale_std": 0.1762317180633545,
"signal/advantage_std": 0.9836538314819336,
"signal/brier_reward/centered_abs_mean": 0.22580573260784148,
"signal/brier_reward/group_std_mean": 0.2729759395122528,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1860074907541275,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.022580574080348016,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03142448402941227,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05232158675789833,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02579816021025181,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031424484681338073,
"signal/format_reward/centered_abs_mean": 0.021940104477107526,
"signal/format_reward/group_std_mean": 0.04096246287226677,
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08990018367767334,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010970052238553763,
"signal/frontier_coverage_0/centered_abs_mean": 0.2512904554605484,
"signal/frontier_coverage_0/group_std_mean": 0.32389657497406005,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029607247561216354,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003593453438952565,
"signal/frontier_coverage_1/centered_abs_mean": 0.2512904554605484,
"signal/frontier_coverage_1/group_std_mean": 0.32389657497406005,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029607247561216354,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003593453438952565,
"signal/frontier_coverage_10/centered_abs_mean": 0.2512904554605484,
"signal/frontier_coverage_10/group_std_mean": 0.32389657497406005,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029607247561216354,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003593453438952565,
"signal/frontier_coverage_15/centered_abs_mean": 0.2512904554605484,
"signal/frontier_coverage_15/group_std_mean": 0.32389657497406005,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029607247561216354,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003593453438952565,
"signal/frontier_coverage_20/centered_abs_mean": 0.2512904554605484,
"signal/frontier_coverage_20/group_std_mean": 0.32389657497406005,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029607247561216354,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003593453438952565,
"signal/frontier_coverage_25/centered_abs_mean": 0.2512904554605484,
"signal/frontier_coverage_25/group_std_mean": 0.32389657497406005,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029607247561216354,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003593453438952565,
"signal/frontier_coverage_5/centered_abs_mean": 0.2512904554605484,
"signal/frontier_coverage_5/group_std_mean": 0.32389657497406005,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029607247561216354,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003593453438952565,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30838763117790224,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38390254974365234,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.25373234748840334,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03083876222372055,
"step": 65
},
{
"calibration/aurc": 0.37614875200348924,
"calibration/batch_distribution_entropy": 0.9437000522925292,
"calibration/buffer_distribution_entropy": 0.6810074171481879,
"calibration/confidence_entropy": 0.48104768667204156,
"calibration/coverage@0%": 0.005291005291005291,
"calibration/coverage@1%": 0.005291005291005291,
"calibration/coverage@10%": 0.006349206349206349,
"calibration/coverage@15%": 0.014285714285714285,
"calibration/coverage@20%": 0.03965483808415745,
"calibration/coverage@25%": 0.08833208676140612,
"calibration/coverage@30%": 0.14934841248361422,
"calibration/coverage@5%": 0.005291005291005291,
"calibration/ece": 0.20092050987122345,
"calibration/mean_confidence": 0.6378651698286639,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011458333333333326,
"completions/max_length": 3443.0,
"completions/max_terminated_length": 3443.0,
"completions/mean_length": 591.0729248046875,
"completions/mean_terminated_length": 597.8992553710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 137.6,
"epoch": 0.16799790002624967,
"grad_norm": 0.0031007726211100817,
"learning_rate": 1.682692307692308e-06,
"loss": -0.0277,
"num_tokens": 127580479.0,
"reward": 0.9297984480857849,
"reward_std": 0.162366783618927,
"rewards/accuracy_reward": 0.6066840410232544,
"rewards/brier_reward": 0.7150557637214661,
"rewards/confidence_uniqueness_reward": 0.9357271790504456,
"rewards/format_reward": 0.9878472089767456,
"rewards/frontier_coverage_0": -0.018339458177797495,
"rewards/frontier_coverage_1": -0.018339458177797495,
"rewards/frontier_coverage_10": -0.018339458177797495,
"rewards/frontier_coverage_15": -0.018339458177797495,
"rewards/frontier_coverage_20": -0.018339458177797495,
"rewards/frontier_coverage_25": -0.018339458177797495,
"rewards/frontier_coverage_5": -0.018339458177797495,
"rewards/frontier_entropy_batch_reward": -0.30709723234176634,
"signal/accuracy_reward/centered_abs_mean": 0.19468858242034912,
"signal/accuracy_reward/group_std_mean": 0.2581492453813553,
"signal/accuracy_reward/group_zero_std_frac": 0.2611111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9012615561485291,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09734429121017456,
"signal/advantage_abs_mean": 0.737975811958313,
"signal/advantage_pre_scale_abs_mean": 0.12125055640935897,
"signal/advantage_pre_scale_std": 0.1813569724559784,
"signal/advantage_std": 0.983547055721283,
"signal/brier_reward/centered_abs_mean": 0.2159047991037369,
"signal/brier_reward/group_std_mean": 0.26480709910392763,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2007334202528,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02159047983586788,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.035509735345840454,
"signal/confidence_uniqueness_reward/group_std_mean": 0.056942480802536014,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03271013423800469,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003550973394885659,
"signal/format_reward/centered_abs_mean": 0.021148003451526166,
"signal/format_reward/group_std_mean": 0.03970330134034157,
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09669478535652161,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010574001725763083,
"signal/frontier_coverage_0/centered_abs_mean": 0.1944323420524597,
"signal/frontier_coverage_0/group_std_mean": 0.26410470604896547,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.025967657193541528,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027803825214505196,
"signal/frontier_coverage_1/centered_abs_mean": 0.1944323420524597,
"signal/frontier_coverage_1/group_std_mean": 0.26410470604896547,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.025967657193541528,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027803825214505196,
"signal/frontier_coverage_10/centered_abs_mean": 0.1944323420524597,
"signal/frontier_coverage_10/group_std_mean": 0.26410470604896547,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.025967657193541528,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027803825214505196,
"signal/frontier_coverage_15/centered_abs_mean": 0.1944323420524597,
"signal/frontier_coverage_15/group_std_mean": 0.26410470604896547,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025967657193541528,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027803825214505196,
"signal/frontier_coverage_20/centered_abs_mean": 0.1944323420524597,
"signal/frontier_coverage_20/group_std_mean": 0.26410470604896547,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025967657193541528,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027803825214505196,
"signal/frontier_coverage_25/centered_abs_mean": 0.1944323420524597,
"signal/frontier_coverage_25/group_std_mean": 0.26410470604896547,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025967657193541528,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027803825214505196,
"signal/frontier_coverage_5/centered_abs_mean": 0.1944323420524597,
"signal/frontier_coverage_5/group_std_mean": 0.26410470604896547,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.025967657193541528,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027803825214505196,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37358362078666685,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4412872850894928,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.34820364117622377,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03735836371779442,
"step": 70
},
{
"calibration/aurc": 0.29475290186298847,
"calibration/batch_distribution_entropy": 0.9601365965846205,
"calibration/buffer_distribution_entropy": 0.7126132478286568,
"calibration/confidence_entropy": 0.5229362687640748,
"calibration/coverage@0%": 0.0036750668074058772,
"calibration/coverage@1%": 0.0036750668074058772,
"calibration/coverage@10%": 0.028272827664786983,
"calibration/coverage@15%": 0.15863521881757633,
"calibration/coverage@20%": 0.27442664596711597,
"calibration/coverage@25%": 0.5169448769362663,
"calibration/coverage@30%": 0.6216779623354258,
"calibration/coverage@5%": 0.0036750668074058772,
"calibration/ece": 0.20640462574306961,
"calibration/mean_confidence": 0.5862351896256457,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010763888888888861,
"completions/max_length": 3745.6,
"completions/max_terminated_length": 3745.6,
"completions/mean_length": 597.4527099609375,
"completions/mean_terminated_length": 604.02197265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.4,
"epoch": 0.17999775002812465,
"grad_norm": 0.0030304889660328627,
"learning_rate": 1.8028846153846156e-06,
"loss": -0.0221,
"num_tokens": 137528030.0,
"reward": 0.9619996666908264,
"reward_std": 0.15815145373344422,
"rewards/accuracy_reward": 0.6619791626930237,
"rewards/brier_reward": 0.7344695806503296,
"rewards/confidence_uniqueness_reward": 0.9398493289947509,
"rewards/format_reward": 0.9886284828186035,
"rewards/frontier_coverage_0": -0.04306319504976273,
"rewards/frontier_coverage_1": -0.04306319504976273,
"rewards/frontier_coverage_10": -0.04306319504976273,
"rewards/frontier_coverage_15": -0.04306319504976273,
"rewards/frontier_coverage_20": -0.04306319504976273,
"rewards/frontier_coverage_25": -0.04306319504976273,
"rewards/frontier_coverage_5": -0.04306319504976273,
"rewards/frontier_entropy_batch_reward": -0.26425428688526154,
"signal/accuracy_reward/centered_abs_mean": 0.2045247346162796,
"signal/accuracy_reward/group_std_mean": 0.2658460080623627,
"signal/accuracy_reward/group_zero_std_frac": 0.2611111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9393451929092407,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1022623673081398,
"signal/advantage_abs_mean": 0.7476536273956299,
"signal/advantage_pre_scale_abs_mean": 0.11897708922624588,
"signal/advantage_pre_scale_std": 0.17594174146652222,
"signal/advantage_std": 0.9835451006889343,
"signal/brier_reward/centered_abs_mean": 0.20120880603790284,
"signal/brier_reward/group_std_mean": 0.2491263210773468,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18733288943767548,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020120881125330926,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031463361158967015,
"signal/confidence_uniqueness_reward/group_std_mean": 0.055449703335762025,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02855253517627716,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003146336181089282,
"signal/format_reward/centered_abs_mean": 0.02031792579218745,
"signal/format_reward/group_std_mean": 0.042368917539715764,
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09047293290495872,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010158962896093726,
"signal/frontier_coverage_0/centered_abs_mean": 0.21644563972949982,
"signal/frontier_coverage_0/group_std_mean": 0.28655238151550294,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.028951628133654594,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030951724853366613,
"signal/frontier_coverage_1/centered_abs_mean": 0.21644563972949982,
"signal/frontier_coverage_1/group_std_mean": 0.28655238151550294,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.028951628133654594,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030951724853366613,
"signal/frontier_coverage_10/centered_abs_mean": 0.21644563972949982,
"signal/frontier_coverage_10/group_std_mean": 0.28655238151550294,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.028951628133654594,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030951724853366613,
"signal/frontier_coverage_15/centered_abs_mean": 0.21644563972949982,
"signal/frontier_coverage_15/group_std_mean": 0.28655238151550294,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.028951628133654594,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030951724853366613,
"signal/frontier_coverage_20/centered_abs_mean": 0.21644563972949982,
"signal/frontier_coverage_20/group_std_mean": 0.28655238151550294,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028951628133654594,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030951724853366613,
"signal/frontier_coverage_25/centered_abs_mean": 0.21644563972949982,
"signal/frontier_coverage_25/group_std_mean": 0.28655238151550294,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028951628133654594,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030951724853366613,
"signal/frontier_coverage_5/centered_abs_mean": 0.21644563972949982,
"signal/frontier_coverage_5/group_std_mean": 0.28655238151550294,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.028951628133654594,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030951724853366613,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3392821609973907,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.412265545129776,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.31629372835159303,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033928216248750684,
"step": 75
},
{
"calibration/aurc": 0.25231172767774057,
"calibration/batch_distribution_entropy": 0.9542390552983331,
"calibration/buffer_distribution_entropy": 0.7438964308184917,
"calibration/confidence_entropy": 0.5081422037709752,
"calibration/coverage@0%": 0.014464726374207117,
"calibration/coverage@1%": 0.014464726374207117,
"calibration/coverage@10%": 0.11062715955993954,
"calibration/coverage@15%": 0.3353820244649297,
"calibration/coverage@20%": 0.47027582394426304,
"calibration/coverage@25%": 0.6061727802317394,
"calibration/coverage@30%": 0.6764227642276422,
"calibration/coverage@5%": 0.03178756101987641,
"calibration/ece": 0.17584206931549817,
"calibration/mean_confidence": 0.6132220308319034,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015190972222222232,
"completions/max_length": 3805.4,
"completions/max_terminated_length": 3805.4,
"completions/mean_length": 624.1913330078125,
"completions/mean_terminated_length": 633.8685424804687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 147.2,
"epoch": 0.19199760002999963,
"grad_norm": 0.017383286729454994,
"learning_rate": 1.9230769230769234e-06,
"loss": -0.0345,
"num_tokens": 147771994.0,
"reward": 0.9449098944664002,
"reward_std": 0.1581345945596695,
"rewards/accuracy_reward": 0.6279513835906982,
"rewards/brier_reward": 0.7345611333847046,
"rewards/confidence_uniqueness_reward": 0.9351885437965393,
"rewards/format_reward": 0.9838541746139526,
"rewards/frontier_coverage_0": -0.017121723480522634,
"rewards/frontier_coverage_1": -0.017121723480522634,
"rewards/frontier_coverage_10": -0.017121723480522634,
"rewards/frontier_coverage_15": -0.017121723480522634,
"rewards/frontier_coverage_20": -0.017121723480522634,
"rewards/frontier_coverage_25": -0.017121723480522634,
"rewards/frontier_coverage_5": -0.017121723480522634,
"rewards/frontier_entropy_batch_reward": -0.2625397890806198,
"signal/accuracy_reward/centered_abs_mean": 0.19267578125,
"signal/accuracy_reward/group_std_mean": 0.2536807984113693,
"signal/accuracy_reward/group_zero_std_frac": 0.286111119389534,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9132581114768982,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.096337890625,
"signal/advantage_abs_mean": 0.7506688952445983,
"signal/advantage_pre_scale_abs_mean": 0.11927641779184342,
"signal/advantage_pre_scale_std": 0.17789039611816407,
"signal/advantage_std": 0.9835266828536987,
"signal/brier_reward/centered_abs_mean": 0.1951048344373703,
"signal/brier_reward/group_std_mean": 0.2429314970970154,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18561370372772218,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019510484114289284,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03635510839521885,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05895108655095101,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03440159037709236,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003635510988533497,
"signal/format_reward/centered_abs_mean": 0.02534722201526165,
"signal/format_reward/group_std_mean": 0.04572202190756798,
"signal/format_reward/group_zero_std_frac": 0.8166666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1192347913980484,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012673611007630824,
"signal/frontier_coverage_0/centered_abs_mean": 0.20994635820388793,
"signal/frontier_coverage_0/group_std_mean": 0.2800168454647064,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02855151817202568,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030022328719496727,
"signal/frontier_coverage_1/centered_abs_mean": 0.20994635820388793,
"signal/frontier_coverage_1/group_std_mean": 0.2800168454647064,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02855151817202568,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030022328719496727,
"signal/frontier_coverage_10/centered_abs_mean": 0.20994635820388793,
"signal/frontier_coverage_10/group_std_mean": 0.2800168454647064,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02855151817202568,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030022328719496727,
"signal/frontier_coverage_15/centered_abs_mean": 0.20994635820388793,
"signal/frontier_coverage_15/group_std_mean": 0.2800168454647064,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02855151817202568,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030022328719496727,
"signal/frontier_coverage_20/centered_abs_mean": 0.20994635820388793,
"signal/frontier_coverage_20/group_std_mean": 0.2800168454647064,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02855151817202568,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030022328719496727,
"signal/frontier_coverage_25/centered_abs_mean": 0.20994635820388793,
"signal/frontier_coverage_25/group_std_mean": 0.2800168454647064,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02855151817202568,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030022328719496727,
"signal/frontier_coverage_5/centered_abs_mean": 0.20994635820388793,
"signal/frontier_coverage_5/group_std_mean": 0.2800168454647064,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02855151817202568,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030022328719496727,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3351067781448364,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40662684440612795,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3202349007129669,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03351067900657654,
"step": 80
},
{
"calibration/aurc": 0.23160324850331024,
"calibration/batch_distribution_entropy": 0.9763916446165247,
"calibration/buffer_distribution_entropy": 0.7679797116828759,
"calibration/confidence_entropy": 0.488824354782312,
"calibration/coverage@0%": 0.010098348032500199,
"calibration/coverage@1%": 0.010098348032500199,
"calibration/coverage@10%": 0.06652945039470493,
"calibration/coverage@15%": 0.2055611553976477,
"calibration/coverage@20%": 0.42201793128624443,
"calibration/coverage@25%": 0.6277265455491603,
"calibration/coverage@30%": 0.805941913022056,
"calibration/coverage@5%": 0.010098348032500199,
"calibration/ece": 0.1422296182290296,
"calibration/mean_confidence": 0.5608671261986663,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011458333333333348,
"completions/max_length": 3763.8,
"completions/max_terminated_length": 3763.8,
"completions/mean_length": 627.2774291992188,
"completions/mean_terminated_length": 634.5992553710937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 183.0,
"epoch": 0.2039974500318746,
"grad_norm": 0.0028041868936270475,
"learning_rate": 2.043269230769231e-06,
"loss": -0.0302,
"num_tokens": 158085430.0,
"reward": 0.9636515617370606,
"reward_std": 0.15045951604843139,
"rewards/accuracy_reward": 0.6599826216697693,
"rewards/brier_reward": 0.7576205849647522,
"rewards/confidence_uniqueness_reward": 0.9372734069824219,
"rewards/format_reward": 0.9880208253860474,
"rewards/frontier_coverage_0": -0.010684687085449696,
"rewards/frontier_coverage_1": -0.010684687085449696,
"rewards/frontier_coverage_10": -0.010684687085449696,
"rewards/frontier_coverage_15": -0.010684687085449696,
"rewards/frontier_coverage_20": -0.010684687085449696,
"rewards/frontier_coverage_25": -0.010684687085449696,
"rewards/frontier_coverage_5": -0.010684687085449696,
"rewards/frontier_entropy_batch_reward": -0.28770090639591217,
"signal/accuracy_reward/centered_abs_mean": 0.19462348222732545,
"signal/accuracy_reward/group_std_mean": 0.25132531523704527,
"signal/accuracy_reward/group_zero_std_frac": 0.30555556416511537,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0194868922233582,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09731174111366273,
"signal/advantage_abs_mean": 0.7488582253456115,
"signal/advantage_pre_scale_abs_mean": 0.11269197762012481,
"signal/advantage_pre_scale_std": 0.17237231135368347,
"signal/advantage_std": 0.9834303975105285,
"signal/brier_reward/centered_abs_mean": 0.1861840546131134,
"signal/brier_reward/group_std_mean": 0.23627618551254273,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19565358459949495,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01861840598285198,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033527684211730954,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05549739152193069,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03534887060523033,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033527683932334184,
"signal/format_reward/centered_abs_mean": 0.0208984375,
"signal/format_reward/group_std_mean": 0.04063469469547272,
"signal/format_reward/group_zero_std_frac": 0.8305555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11059261560440063,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01044921875,
"signal/frontier_coverage_0/centered_abs_mean": 0.2140215367078781,
"signal/frontier_coverage_0/group_std_mean": 0.28595501780509947,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03218608051538467,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003060508007183671,
"signal/frontier_coverage_1/centered_abs_mean": 0.2140215367078781,
"signal/frontier_coverage_1/group_std_mean": 0.28595501780509947,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03218608051538467,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003060508007183671,
"signal/frontier_coverage_10/centered_abs_mean": 0.2140215367078781,
"signal/frontier_coverage_10/group_std_mean": 0.28595501780509947,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03218608051538467,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003060508007183671,
"signal/frontier_coverage_15/centered_abs_mean": 0.2140215367078781,
"signal/frontier_coverage_15/group_std_mean": 0.28595501780509947,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03218608051538467,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003060508007183671,
"signal/frontier_coverage_20/centered_abs_mean": 0.2140215367078781,
"signal/frontier_coverage_20/group_std_mean": 0.28595501780509947,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03218608051538467,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003060508007183671,
"signal/frontier_coverage_25/centered_abs_mean": 0.2140215367078781,
"signal/frontier_coverage_25/group_std_mean": 0.28595501780509947,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03218608051538467,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003060508007183671,
"signal/frontier_coverage_5/centered_abs_mean": 0.2140215367078781,
"signal/frontier_coverage_5/group_std_mean": 0.28595501780509947,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03218608051538467,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003060508007183671,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33995108008384706,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41062380075454713,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3568801164627075,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033995109423995015,
"step": 85
},
{
"calibration/aurc": 0.18159155784667086,
"calibration/batch_distribution_entropy": 0.9715807596045813,
"calibration/buffer_distribution_entropy": 0.7911530467023005,
"calibration/confidence_entropy": 0.5011403682671715,
"calibration/coverage@0%": 0.03679449026337346,
"calibration/coverage@1%": 0.03679449026337346,
"calibration/coverage@10%": 0.26407442436525363,
"calibration/coverage@15%": 0.40627110602921057,
"calibration/coverage@20%": 0.5997198808977326,
"calibration/coverage@25%": 0.7829376909111836,
"calibration/coverage@30%": 0.8591208692225413,
"calibration/coverage@5%": 0.05354158197526497,
"calibration/ece": 0.15576247778024233,
"calibration/mean_confidence": 0.5620462523617634,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013975694444444464,
"completions/max_length": 3526.8,
"completions/max_terminated_length": 3526.8,
"completions/mean_length": 621.3036499023438,
"completions/mean_terminated_length": 630.1340087890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.2,
"epoch": 0.2159973000337496,
"grad_norm": 0.003203911706805229,
"learning_rate": 2.1634615384615387e-06,
"loss": -0.0348,
"num_tokens": 168311520.0,
"reward": 0.9634303450584412,
"reward_std": 0.14751038253307341,
"rewards/accuracy_reward": 0.6569444417953492,
"rewards/brier_reward": 0.7645626664161682,
"rewards/confidence_uniqueness_reward": 0.9359369039535522,
"rewards/format_reward": 0.9853298544883728,
"rewards/frontier_coverage_0": -0.0024734840262681246,
"rewards/frontier_coverage_1": -0.0024734840262681246,
"rewards/frontier_coverage_10": -0.0024734840262681246,
"rewards/frontier_coverage_15": -0.0024734840262681246,
"rewards/frontier_coverage_20": -0.0024734840262681246,
"rewards/frontier_coverage_25": -0.0024734840262681246,
"rewards/frontier_coverage_5": -0.0024734840262681246,
"rewards/frontier_entropy_batch_reward": -0.2750917077064514,
"signal/accuracy_reward/centered_abs_mean": 0.18245442807674409,
"signal/accuracy_reward/group_std_mean": 0.2405119866132736,
"signal/accuracy_reward/group_zero_std_frac": 0.3138888835906982,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9671781301498413,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09122721403837204,
"signal/advantage_abs_mean": 0.7483711242675781,
"signal/advantage_pre_scale_abs_mean": 0.1103176698088646,
"signal/advantage_pre_scale_std": 0.17092794477939605,
"signal/advantage_std": 0.9834154367446899,
"signal/brier_reward/centered_abs_mean": 0.17637307345867156,
"signal/brier_reward/group_std_mean": 0.22479958832263947,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18773746192455293,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017637307941913604,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036441614478826524,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05890980660915375,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.038740897178649904,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003644161531701684,
"signal/format_reward/centered_abs_mean": 0.02457139752805233,
"signal/format_reward/group_std_mean": 0.04477007761597633,
"signal/format_reward/group_zero_std_frac": 0.8222222208976746,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1305002197623253,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012285698764026165,
"signal/frontier_coverage_0/centered_abs_mean": 0.20906727015972137,
"signal/frontier_coverage_0/group_std_mean": 0.27877257466316224,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03182048015296459,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002989662066102028,
"signal/frontier_coverage_1/centered_abs_mean": 0.20906727015972137,
"signal/frontier_coverage_1/group_std_mean": 0.27877257466316224,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03182048015296459,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002989662066102028,
"signal/frontier_coverage_10/centered_abs_mean": 0.20906727015972137,
"signal/frontier_coverage_10/group_std_mean": 0.27877257466316224,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03182048015296459,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002989662066102028,
"signal/frontier_coverage_15/centered_abs_mean": 0.20906727015972137,
"signal/frontier_coverage_15/group_std_mean": 0.27877257466316224,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03182048015296459,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002989662066102028,
"signal/frontier_coverage_20/centered_abs_mean": 0.20906727015972137,
"signal/frontier_coverage_20/group_std_mean": 0.27877257466316224,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03182048015296459,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002989662066102028,
"signal/frontier_coverage_25/centered_abs_mean": 0.20906727015972137,
"signal/frontier_coverage_25/group_std_mean": 0.27877257466316224,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03182048015296459,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002989662066102028,
"signal/frontier_coverage_5/centered_abs_mean": 0.20906727015972137,
"signal/frontier_coverage_5/group_std_mean": 0.27877257466316224,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03182048015296459,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002989662066102028,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.341404515504837,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4121077060699463,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3643798530101776,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034140453487634656,
"step": 90
},
{
"calibration/aurc": 0.2166042716481274,
"calibration/batch_distribution_entropy": 0.9833011135389865,
"calibration/buffer_distribution_entropy": 0.8075535376436263,
"calibration/confidence_entropy": 0.49801110093217843,
"calibration/coverage@0%": 0.016875791014919615,
"calibration/coverage@1%": 0.016875791014919615,
"calibration/coverage@10%": 0.3095501463732172,
"calibration/coverage@15%": 0.5008451526286807,
"calibration/coverage@20%": 0.605387048242923,
"calibration/coverage@25%": 0.6472735880583714,
"calibration/coverage@30%": 0.7001076678613718,
"calibration/coverage@5%": 0.11083371426044182,
"calibration/ece": 0.18083649378114502,
"calibration/mean_confidence": 0.5293936765297097,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010590277777777768,
"completions/max_length": 3390.8,
"completions/max_terminated_length": 3390.8,
"completions/mean_length": 650.1599975585938,
"completions/mean_terminated_length": 657.1581176757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 168.6,
"epoch": 0.22799715003562457,
"grad_norm": 0.0029179048724472523,
"learning_rate": 2.283653846153846e-06,
"loss": -0.0354,
"num_tokens": 178893043.0,
"reward": 0.9682246446609497,
"reward_std": 0.13790646493434905,
"rewards/accuracy_reward": 0.6546007037162781,
"rewards/brier_reward": 0.760300588607788,
"rewards/confidence_uniqueness_reward": 0.9425395250320434,
"rewards/format_reward": 0.9892361044883728,
"rewards/frontier_coverage_0": -0.007322131656110287,
"rewards/frontier_coverage_1": -0.007322131656110287,
"rewards/frontier_coverage_10": -0.007322131656110287,
"rewards/frontier_coverage_15": -0.007322131656110287,
"rewards/frontier_coverage_20": -0.007322131656110287,
"rewards/frontier_coverage_25": -0.007322131656110287,
"rewards/frontier_coverage_5": -0.007322131656110287,
"rewards/frontier_entropy_batch_reward": -0.2324485570192337,
"signal/accuracy_reward/centered_abs_mean": 0.17507053017616273,
"signal/accuracy_reward/group_std_mean": 0.23211880326271056,
"signal/accuracy_reward/group_zero_std_frac": 0.33333333730697634,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9639934659004211,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08753526508808136,
"signal/advantage_abs_mean": 0.7448830127716064,
"signal/advantage_pre_scale_abs_mean": 0.10292523950338364,
"signal/advantage_pre_scale_std": 0.15798973739147187,
"signal/advantage_std": 0.9833694338798523,
"signal/brier_reward/centered_abs_mean": 0.17270311415195466,
"signal/brier_reward/group_std_mean": 0.21927067041397094,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1916445404291153,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017270312085747717,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02888911999762058,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04897563457489014,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032245057821273806,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028889121022075415,
"signal/format_reward/centered_abs_mean": 0.018511285074055195,
"signal/format_reward/group_std_mean": 0.03622478432953358,
"signal/format_reward/group_zero_std_frac": 0.8472222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10353666096925736,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009255642537027597,
"signal/frontier_coverage_0/centered_abs_mean": 0.2209547370672226,
"signal/frontier_coverage_0/group_std_mean": 0.2883128225803375,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03494723662734032,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003159652603790164,
"signal/frontier_coverage_1/centered_abs_mean": 0.2209547370672226,
"signal/frontier_coverage_1/group_std_mean": 0.2883128225803375,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03494723662734032,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003159652603790164,
"signal/frontier_coverage_10/centered_abs_mean": 0.2209547370672226,
"signal/frontier_coverage_10/group_std_mean": 0.2883128225803375,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03494723662734032,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003159652603790164,
"signal/frontier_coverage_15/centered_abs_mean": 0.2209547370672226,
"signal/frontier_coverage_15/group_std_mean": 0.2883128225803375,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03494723662734032,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003159652603790164,
"signal/frontier_coverage_20/centered_abs_mean": 0.2209547370672226,
"signal/frontier_coverage_20/group_std_mean": 0.2883128225803375,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03494723662734032,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003159652603790164,
"signal/frontier_coverage_25/centered_abs_mean": 0.2209547370672226,
"signal/frontier_coverage_25/group_std_mean": 0.2883128225803375,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03494723662734032,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003159652603790164,
"signal/frontier_coverage_5/centered_abs_mean": 0.2209547370672226,
"signal/frontier_coverage_5/group_std_mean": 0.2883128225803375,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03494723662734032,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003159652603790164,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3112953960895538,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38544243574142456,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3464753270149231,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031129539757966996,
"step": 95
},
{
"calibration/aurc": 0.16286997075318857,
"calibration/batch_distribution_entropy": 0.9568406111615333,
"calibration/buffer_distribution_entropy": 0.8212428711447959,
"calibration/confidence_entropy": 0.5075458735010276,
"calibration/coverage@0%": 0.04542281875223174,
"calibration/coverage@1%": 0.04542281875223174,
"calibration/coverage@10%": 0.22939056407012304,
"calibration/coverage@15%": 0.5071482595725405,
"calibration/coverage@20%": 0.7381393879589083,
"calibration/coverage@25%": 0.8506309096295332,
"calibration/coverage@30%": 0.9161175945633071,
"calibration/coverage@5%": 0.08715547092641457,
"calibration/ece": 0.13007832844366438,
"calibration/mean_confidence": 0.6023370758512752,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014843750000000022,
"completions/max_length": 3476.6,
"completions/max_terminated_length": 3476.6,
"completions/mean_length": 687.32822265625,
"completions/mean_terminated_length": 697.676513671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 190.6,
"epoch": 0.23999700003749952,
"grad_norm": 0.0030280128121376038,
"learning_rate": 2.403846153846154e-06,
"loss": -0.0394,
"num_tokens": 189910136.0,
"reward": 0.972452974319458,
"reward_std": 0.14463671147823334,
"rewards/accuracy_reward": 0.6689236044883728,
"rewards/brier_reward": 0.7945778131484985,
"rewards/confidence_uniqueness_reward": 0.934015154838562,
"rewards/format_reward": 0.9850694417953492,
"rewards/frontier_coverage_0": 0.020587368682026864,
"rewards/frontier_coverage_1": 0.020587368682026864,
"rewards/frontier_coverage_10": 0.020587368682026864,
"rewards/frontier_coverage_15": 0.020587368682026864,
"rewards/frontier_coverage_20": 0.020587368682026864,
"rewards/frontier_coverage_25": 0.020587368682026864,
"rewards/frontier_coverage_5": 0.020587368682026864,
"rewards/frontier_entropy_batch_reward": -0.29463626742362975,
"signal/accuracy_reward/centered_abs_mean": 0.17580294609069824,
"signal/accuracy_reward/group_std_mean": 0.22891995906829835,
"signal/accuracy_reward/group_zero_std_frac": 0.3611111223697662,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0090148210525514,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08790147304534912,
"signal/advantage_abs_mean": 0.7534759998321533,
"signal/advantage_pre_scale_abs_mean": 0.10780888944864273,
"signal/advantage_pre_scale_std": 0.16814400553703307,
"signal/advantage_std": 0.9833277583122253,
"signal/brier_reward/centered_abs_mean": 0.14416988790035248,
"signal/brier_reward/group_std_mean": 0.18674043118953704,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1662220239639282,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014416989497840404,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03587759211659432,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05903834477066994,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041634421050548556,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003587759332731366,
"signal/format_reward/centered_abs_mean": 0.02444661483168602,
"signal/format_reward/group_std_mean": 0.04573269262909889,
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1418396607041359,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01222330741584301,
"signal/frontier_coverage_0/centered_abs_mean": 0.17305268943309784,
"signal/frontier_coverage_0/group_std_mean": 0.22779336273670198,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.028698178008198738,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024746534414589404,
"signal/frontier_coverage_1/centered_abs_mean": 0.17305268943309784,
"signal/frontier_coverage_1/group_std_mean": 0.22779336273670198,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.028698178008198738,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024746534414589404,
"signal/frontier_coverage_10/centered_abs_mean": 0.17305268943309784,
"signal/frontier_coverage_10/group_std_mean": 0.22779336273670198,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.028698178008198738,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024746534414589404,
"signal/frontier_coverage_15/centered_abs_mean": 0.17305268943309784,
"signal/frontier_coverage_15/group_std_mean": 0.22779336273670198,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.028698178008198738,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024746534414589404,
"signal/frontier_coverage_20/centered_abs_mean": 0.17305268943309784,
"signal/frontier_coverage_20/group_std_mean": 0.22779336273670198,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028698178008198738,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024746534414589404,
"signal/frontier_coverage_25/centered_abs_mean": 0.17305268943309784,
"signal/frontier_coverage_25/group_std_mean": 0.22779336273670198,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028698178008198738,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024746534414589404,
"signal/frontier_coverage_5/centered_abs_mean": 0.17305268943309784,
"signal/frontier_coverage_5/group_std_mean": 0.22779336273670198,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.028698178008198738,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024746534414589404,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3312057614326477,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40025997161865234,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3819632053375244,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03312057442963123,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_calibration/aurc": 0.16686171218946902,
"eval_calibration/batch_distribution_entropy": 0.9064578842709808,
"eval_calibration/buffer_distribution_entropy": 0.8274689120482418,
"eval_calibration/confidence_entropy": 0.5010017723628131,
"eval_calibration/coverage@0%": 0.203125,
"eval_calibration/coverage@1%": 0.203125,
"eval_calibration/coverage@10%": 0.3645833333333333,
"eval_calibration/coverage@15%": 0.59375,
"eval_calibration/coverage@20%": 0.7239583333333334,
"eval_calibration/coverage@25%": 0.890625,
"eval_calibration/coverage@30%": 0.9427083333333334,
"eval_calibration/coverage@5%": 0.21354166666666666,
"eval_calibration/ece": 0.19091592880746577,
"eval_calibration/mean_confidence": 0.6266145921062726,
"eval_completions/clipped_ratio": 0.012152777777777771,
"eval_completions/max_length": 2540.8333333333335,
"eval_completions/max_terminated_length": 2540.8333333333335,
"eval_completions/mean_length": 683.2637329101562,
"eval_completions/mean_terminated_length": 691.6889953613281,
"eval_completions/min_length": 90.83333333333333,
"eval_completions/min_terminated_length": 244.5,
"eval_loss": 0.0,
"eval_num_tokens": 189910136.0,
"eval_reward": 0.8880608379840851,
"eval_reward_std": 0.2467256337404251,
"eval_rewards/accuracy_reward": 0.6467013955116272,
"eval_rewards/brier_reward": 0.7946632703145345,
"eval_rewards/confidence_uniqueness_reward": 0.8764048020044962,
"eval_rewards/format_reward": 0.9869791666666666,
"eval_rewards/frontier_coverage_0": 0.028088706807466224,
"eval_rewards/frontier_coverage_1": 0.028088706807466224,
"eval_rewards/frontier_coverage_10": 0.028088706807466224,
"eval_rewards/frontier_coverage_15": 0.028088706807466224,
"eval_rewards/frontier_coverage_20": 0.028088706807466224,
"eval_rewards/frontier_coverage_25": 0.028088706807466224,
"eval_rewards/frontier_coverage_5": 0.028088706807466224,
"eval_rewards/frontier_entropy_batch_reward": -0.9869791666666666,
"eval_runtime": 203.6327,
"eval_samples_per_second": 4.911,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4421115467945735,
"eval_signal/accuracy_reward/group_std_mean": 0.4767638146877289,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9132186075051626,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22105577339728674,
"eval_signal/advantage_abs_mean": 0.8897554278373718,
"eval_signal/advantage_pre_scale_abs_mean": 0.21958689639965692,
"eval_signal/advantage_pre_scale_std": 0.24578682581583658,
"eval_signal/advantage_std": 0.98641636967659,
"eval_signal/brier_reward/centered_abs_mean": 0.19057869911193848,
"eval_signal/brier_reward/group_std_mean": 0.2465061495701472,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0786722960571448,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019057870687295992,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.058801048124829926,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09244673078258832,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024099334763983887,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005880104377865791,
"eval_signal/format_reward/centered_abs_mean": 0.024576822761446238,
"eval_signal/format_reward/group_std_mean": 0.05818357535948356,
"eval_signal/format_reward/group_zero_std_frac": 0.7222222437461218,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.04943395716448625,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012288411380723119,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.20176778982083002,
"eval_signal/frontier_coverage_0/group_std_mean": 0.2950784166653951,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011925621423870325,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002885279362089932,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.20176778982083002,
"eval_signal/frontier_coverage_1/group_std_mean": 0.2950784166653951,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011925621423870325,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002885279362089932,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.20176778982083002,
"eval_signal/frontier_coverage_10/group_std_mean": 0.2950784166653951,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011925621423870325,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002885279362089932,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.20176778982083002,
"eval_signal/frontier_coverage_15/group_std_mean": 0.2950784166653951,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011925621423870325,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002885279362089932,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.20176778982083002,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2950784166653951,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011925621423870325,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002885279362089932,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.20176778982083002,
"eval_signal/frontier_coverage_25/group_std_mean": 0.2950784166653951,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.011925621423870325,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002885279362089932,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.20176778982083002,
"eval_signal/frontier_coverage_5/group_std_mean": 0.2950784166653951,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.011925621423870325,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002885279362089932,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.024576822761446238,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.05818357535948356,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7222222437461218,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00988679169677198,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002457682373157392,
"eval_steps_per_second": 0.029,
"step": 100
},
{
"epoch": 0.23999700003749952,
"step": 100,
"train_probe_calibration/aurc": 0.1750085707513798,
"train_probe_calibration/batch_distribution_entropy": 0.8806643739281838,
"train_probe_calibration/buffer_distribution_entropy": 0.8284393122825452,
"train_probe_calibration/confidence_entropy": 0.47174162139538317,
"train_probe_calibration/coverage@0%": 0.198252688172043,
"train_probe_calibration/coverage@1%": 0.198252688172043,
"train_probe_calibration/coverage@10%": 0.3370295698924732,
"train_probe_calibration/coverage@15%": 0.5196572580645161,
"train_probe_calibration/coverage@20%": 0.7498319892473119,
"train_probe_calibration/coverage@25%": 0.8697916666666666,
"train_probe_calibration/coverage@30%": 0.9635416666666666,
"train_probe_calibration/coverage@5%": 0.198252688172043,
"train_probe_calibration/ece": 0.21608799111205979,
"train_probe_calibration/mean_confidence": 0.6526666320313913,
"train_probe_completions/clipped_ratio": 0.006944444444444457,
"train_probe_completions/max_length": 2153.0,
"train_probe_completions/max_terminated_length": 2153.0,
"train_probe_completions/mean_length": 688.56494140625,
"train_probe_completions/mean_terminated_length": 693.3657430013021,
"train_probe_completions/min_length": 90.16666666666667,
"train_probe_completions/min_terminated_length": 219.83333333333334,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 189910136.0,
"train_probe_reward": 0.9111224909623464,
"train_probe_reward_std": 0.23705051590998968,
"train_probe_rewards/accuracy_reward": 0.6892361044883728,
"train_probe_rewards/brier_reward": 0.8104513386885325,
"train_probe_rewards/confidence_uniqueness_reward": 0.8828965425491333,
"train_probe_rewards/format_reward": 0.9895833432674408,
"train_probe_rewards/frontier_coverage_0": 0.013349682888171325,
"train_probe_rewards/frontier_coverage_1": 0.013349682888171325,
"train_probe_rewards/frontier_coverage_10": 0.013349682888171325,
"train_probe_rewards/frontier_coverage_15": 0.013349682888171325,
"train_probe_rewards/frontier_coverage_20": 0.013349682888171325,
"train_probe_rewards/frontier_coverage_25": 0.013349682888171325,
"train_probe_rewards/frontier_coverage_5": 0.013349682888171325,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9895833432674408,
"train_probe_runtime": 187.4827,
"train_probe_samples_per_second": 5.334,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4167751719554265,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4626837372779846,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8901467820008596,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20838758597771326,
"train_probe_signal/advantage_abs_mean": 0.8610391517480215,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.2044555495182673,
"train_probe_signal/advantage_pre_scale_std": 0.23520567764838538,
"train_probe_signal/advantage_std": 0.9864020446936289,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1798029119769732,
"train_probe_signal/brier_reward/group_std_mean": 0.23360620439052582,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07665263985594113,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01798029150813818,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.053685990472634636,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0893898606300354,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02291724147895972,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005368599280094107,
"train_probe_signal/format_reward/centered_abs_mean": 0.020182291356225807,
"train_probe_signal/format_reward/group_std_mean": 0.05892556471129259,
"train_probe_signal/format_reward/group_zero_std_frac": 0.6666666815678278,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.041724019683897495,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.010091145678112904,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.1969028984506925,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.2944646179676056,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0120306263367335,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002815711389606198,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.1969028984506925,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.2944646179676056,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0120306263367335,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002815711389606198,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.1969028984506925,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.2944646179676056,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0120306263367335,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002815711389606198,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1969028984506925,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.2944646179676056,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0120306263367335,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002815711389606198,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1969028984506925,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.2944646179676056,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0120306263367335,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002815711389606198,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.1969028984506925,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.2944646179676056,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0120306263367335,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002815711389606198,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.1969028984506925,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.2944646179676056,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0120306263367335,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002815711389606198,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.020182291356225807,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.05892556471129259,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6666666815678278,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.008344804247220358,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002018229104578495,
"train_probe_steps_per_second": 0.032
},
{
"calibration/aurc": 0.30654955711846243,
"calibration/batch_distribution_entropy": 0.9495631064195795,
"calibration/buffer_distribution_entropy": 0.8329547447837949,
"calibration/confidence_entropy": 0.5261223218554835,
"calibration/coverage@0%": 0.019454234043486847,
"calibration/coverage@1%": 0.019454234043486847,
"calibration/coverage@10%": 0.12956900775976188,
"calibration/coverage@15%": 0.19017048072492707,
"calibration/coverage@20%": 0.3706057308693712,
"calibration/coverage@25%": 0.4448679341188172,
"calibration/coverage@30%": 0.534387958838888,
"calibration/coverage@5%": 0.05705214527064088,
"calibration/ece": 0.14103971828217957,
"calibration/mean_confidence": 0.6078461706080489,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01371527777777779,
"completions/max_length": 3491.8,
"completions/max_terminated_length": 3491.8,
"completions/mean_length": 699.88525390625,
"completions/mean_terminated_length": 709.6391723632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.2519968500393745,
"grad_norm": 0.0027277623303234577,
"learning_rate": 2.5240384615384618e-06,
"loss": -0.0372,
"num_tokens": 201049678.0,
"reward": 0.9713174700737,
"reward_std": 0.1342061474919319,
"rewards/accuracy_reward": 0.6723958373069763,
"rewards/brier_reward": 0.7931610941886902,
"rewards/confidence_uniqueness_reward": 0.9331457614898682,
"rewards/format_reward": 0.9860243082046509,
"rewards/frontier_coverage_0": 0.009938755445182324,
"rewards/frontier_coverage_1": 0.009938755445182324,
"rewards/frontier_coverage_10": 0.009938755445182324,
"rewards/frontier_coverage_15": 0.009938755445182324,
"rewards/frontier_coverage_20": 0.009938755445182324,
"rewards/frontier_coverage_25": 0.009938755445182324,
"rewards/frontier_coverage_5": 0.009938755445182324,
"rewards/frontier_entropy_batch_reward": -0.3151816755533218,
"signal/accuracy_reward/centered_abs_mean": 0.15600043386220933,
"signal/accuracy_reward/group_std_mean": 0.21429117023944855,
"signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9249350309371949,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07800021693110466,
"signal/advantage_abs_mean": 0.7351720333099365,
"signal/advantage_pre_scale_abs_mean": 0.0974724218249321,
"signal/advantage_pre_scale_std": 0.15650634765625,
"signal/advantage_std": 0.9832755327224731,
"signal/brier_reward/centered_abs_mean": 0.13505308628082274,
"signal/brier_reward/group_std_mean": 0.17719172239303588,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16167917251586914,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013505308330059052,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0334759570658207,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05459783673286438,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04044450968503952,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033475957345217465,
"signal/format_reward/centered_abs_mean": 0.02112087607383728,
"signal/format_reward/group_std_mean": 0.03977926895022392,
"signal/format_reward/group_zero_std_frac": 0.8361111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12590970546007157,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01056043803691864,
"signal/frontier_coverage_0/centered_abs_mean": 0.16347864717245103,
"signal/frontier_coverage_0/group_std_mean": 0.21645487248897552,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0277607012540102,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023377447156235575,
"signal/frontier_coverage_1/centered_abs_mean": 0.16347864717245103,
"signal/frontier_coverage_1/group_std_mean": 0.21645487248897552,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0277607012540102,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023377447156235575,
"signal/frontier_coverage_10/centered_abs_mean": 0.16347864717245103,
"signal/frontier_coverage_10/group_std_mean": 0.21645487248897552,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0277607012540102,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023377447156235575,
"signal/frontier_coverage_15/centered_abs_mean": 0.16347864717245103,
"signal/frontier_coverage_15/group_std_mean": 0.21645487248897552,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0277607012540102,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023377447156235575,
"signal/frontier_coverage_20/centered_abs_mean": 0.16347864717245103,
"signal/frontier_coverage_20/group_std_mean": 0.21645487248897552,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0277607012540102,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023377447156235575,
"signal/frontier_coverage_25/centered_abs_mean": 0.16347864717245103,
"signal/frontier_coverage_25/group_std_mean": 0.21645487248897552,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0277607012540102,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023377447156235575,
"signal/frontier_coverage_5/centered_abs_mean": 0.16347864717245103,
"signal/frontier_coverage_5/group_std_mean": 0.21645487248897552,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0277607012540102,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023377447156235575,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33541697859764097,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40397828817367554,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.407314270734787,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03354169800877571,
"step": 105
},
{
"calibration/aurc": 0.17621004227172601,
"calibration/batch_distribution_entropy": 0.9525836743555572,
"calibration/buffer_distribution_entropy": 0.8424083283894106,
"calibration/confidence_entropy": 0.47573754049226247,
"calibration/coverage@0%": 0.03467249739791169,
"calibration/coverage@1%": 0.03467249739791169,
"calibration/coverage@10%": 0.27721002361313174,
"calibration/coverage@15%": 0.4491039839531317,
"calibration/coverage@20%": 0.554047743934082,
"calibration/coverage@25%": 0.7966191406662304,
"calibration/coverage@30%": 0.9115490951742627,
"calibration/coverage@5%": 0.1632872569158427,
"calibration/ece": 0.13985761805525604,
"calibration/mean_confidence": 0.6064374692157269,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014322916666666675,
"completions/max_length": 3597.0,
"completions/max_terminated_length": 3597.0,
"completions/mean_length": 732.68681640625,
"completions/mean_terminated_length": 743.3656494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 224.0,
"epoch": 0.2639967000412495,
"grad_norm": 0.003282015211880207,
"learning_rate": 2.6442307692307696e-06,
"loss": -0.0306,
"num_tokens": 212598678.0,
"reward": 0.9867424368858337,
"reward_std": 0.13803467750549317,
"rewards/accuracy_reward": 0.7007812619209289,
"rewards/brier_reward": 0.7855064749717713,
"rewards/confidence_uniqueness_reward": 0.9351040124893188,
"rewards/format_reward": 0.9856770873069763,
"rewards/frontier_coverage_0": -0.00702488785609603,
"rewards/frontier_coverage_1": -0.00702488785609603,
"rewards/frontier_coverage_10": -0.00702488785609603,
"rewards/frontier_coverage_15": -0.00702488785609603,
"rewards/frontier_coverage_20": -0.00702488785609603,
"rewards/frontier_coverage_25": -0.00702488785609603,
"rewards/frontier_coverage_5": -0.00702488785609603,
"rewards/frontier_entropy_batch_reward": -0.27844594717025756,
"signal/accuracy_reward/centered_abs_mean": 0.16480577290058135,
"signal/accuracy_reward/group_std_mean": 0.22608107924461365,
"signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9470046520233154,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08240288645029067,
"signal/advantage_abs_mean": 0.7354275345802307,
"signal/advantage_pre_scale_abs_mean": 0.10117111206054688,
"signal/advantage_pre_scale_std": 0.16214256584644318,
"signal/advantage_std": 0.9833307504653931,
"signal/brier_reward/centered_abs_mean": 0.1462298572063446,
"signal/brier_reward/group_std_mean": 0.18954940140247345,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16818097829818726,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014622985012829303,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03436548411846161,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05312940776348114,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0393658496439457,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034365484956651927,
"signal/format_reward/centered_abs_mean": 0.02219509556889534,
"signal/format_reward/group_std_mean": 0.03848949335515499,
"signal/format_reward/group_zero_std_frac": 0.85,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1266273118555546,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01109754778444767,
"signal/frontier_coverage_0/centered_abs_mean": 0.18365023136138917,
"signal/frontier_coverage_0/group_std_mean": 0.24325567483901978,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030299390852451324,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026261982042342425,
"signal/frontier_coverage_1/centered_abs_mean": 0.18365023136138917,
"signal/frontier_coverage_1/group_std_mean": 0.24325567483901978,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030299390852451324,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026261982042342425,
"signal/frontier_coverage_10/centered_abs_mean": 0.18365023136138917,
"signal/frontier_coverage_10/group_std_mean": 0.24325567483901978,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030299390852451324,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026261982042342425,
"signal/frontier_coverage_15/centered_abs_mean": 0.18365023136138917,
"signal/frontier_coverage_15/group_std_mean": 0.24325567483901978,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030299390852451324,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026261982042342425,
"signal/frontier_coverage_20/centered_abs_mean": 0.18365023136138917,
"signal/frontier_coverage_20/group_std_mean": 0.24325567483901978,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030299390852451324,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026261982042342425,
"signal/frontier_coverage_25/centered_abs_mean": 0.18365023136138917,
"signal/frontier_coverage_25/group_std_mean": 0.24325567483901978,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030299390852451324,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026261982042342425,
"signal/frontier_coverage_5/centered_abs_mean": 0.18365023136138917,
"signal/frontier_coverage_5/group_std_mean": 0.24325567483901978,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030299390852451324,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026261982042342425,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3263388693332672,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3969201326370239,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.37608352303504944,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03263388872146607,
"step": 110
},
{
"calibration/aurc": 0.27223667379972827,
"calibration/batch_distribution_entropy": 0.9693905636795204,
"calibration/buffer_distribution_entropy": 0.8511333885278336,
"calibration/confidence_entropy": 0.5051552336993171,
"calibration/coverage@0%": 0.018523467203869926,
"calibration/coverage@1%": 0.018523467203869926,
"calibration/coverage@10%": 0.08575949309203403,
"calibration/coverage@15%": 0.19812398692634017,
"calibration/coverage@20%": 0.41265111135322263,
"calibration/coverage@25%": 0.5600856339560352,
"calibration/coverage@30%": 0.6692724289073688,
"calibration/coverage@5%": 0.023828506991668337,
"calibration/ece": 0.156115622125503,
"calibration/mean_confidence": 0.5472786349034584,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017795138888888885,
"completions/max_length": 3806.2,
"completions/max_terminated_length": 3806.2,
"completions/mean_length": 723.3151977539062,
"completions/mean_terminated_length": 736.4974487304687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.0,
"epoch": 0.27599655004312446,
"grad_norm": 0.002938604913651943,
"learning_rate": 2.7644230769230775e-06,
"loss": -0.0481,
"num_tokens": 224010469.0,
"reward": 0.9614550828933716,
"reward_std": 0.14364836513996124,
"rewards/accuracy_reward": 0.6545138955116272,
"rewards/brier_reward": 0.7815379858016968,
"rewards/confidence_uniqueness_reward": 0.9305242776870728,
"rewards/format_reward": 0.9817708492279053,
"rewards/frontier_coverage_0": 0.014907448133453727,
"rewards/frontier_coverage_1": 0.014907448133453727,
"rewards/frontier_coverage_10": 0.014907448133453727,
"rewards/frontier_coverage_15": 0.014907448133453727,
"rewards/frontier_coverage_20": 0.014907448133453727,
"rewards/frontier_coverage_25": 0.014907448133453727,
"rewards/frontier_coverage_5": 0.014907448133453727,
"rewards/frontier_entropy_batch_reward": -0.29385790824890134,
"signal/accuracy_reward/centered_abs_mean": 0.16838107705116273,
"signal/accuracy_reward/group_std_mean": 0.21618228256702424,
"signal/accuracy_reward/group_zero_std_frac": 0.4,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9831504344940185,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08419053852558137,
"signal/advantage_abs_mean": 0.7567147850990296,
"signal/advantage_pre_scale_abs_mean": 0.10796858966350556,
"signal/advantage_pre_scale_std": 0.1700371354818344,
"signal/advantage_std": 0.9833152413368225,
"signal/brier_reward/centered_abs_mean": 0.1507797509431839,
"signal/brier_reward/group_std_mean": 0.1942868411540985,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1758878141641617,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015077975019812584,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040679140388965605,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06572701260447503,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.047537700086832044,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004067914048209787,
"signal/format_reward/centered_abs_mean": 0.02918836809694767,
"signal/format_reward/group_std_mean": 0.052632787078619,
"signal/format_reward/group_zero_std_frac": 0.7944444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1706949606537819,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014594184048473836,
"signal/frontier_coverage_0/centered_abs_mean": 0.18486830592155457,
"signal/frontier_coverage_0/group_std_mean": 0.24456796944141387,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03077918142080307,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026436167769134044,
"signal/frontier_coverage_1/centered_abs_mean": 0.18486830592155457,
"signal/frontier_coverage_1/group_std_mean": 0.24456796944141387,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03077918142080307,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026436167769134044,
"signal/frontier_coverage_10/centered_abs_mean": 0.18486830592155457,
"signal/frontier_coverage_10/group_std_mean": 0.24456796944141387,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03077918142080307,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026436167769134044,
"signal/frontier_coverage_15/centered_abs_mean": 0.18486830592155457,
"signal/frontier_coverage_15/group_std_mean": 0.24456796944141387,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03077918142080307,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026436167769134044,
"signal/frontier_coverage_20/centered_abs_mean": 0.18486830592155457,
"signal/frontier_coverage_20/group_std_mean": 0.24456796944141387,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03077918142080307,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026436167769134044,
"signal/frontier_coverage_25/centered_abs_mean": 0.18486830592155457,
"signal/frontier_coverage_25/group_std_mean": 0.24456796944141387,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03077918142080307,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026436167769134044,
"signal/frontier_coverage_5/centered_abs_mean": 0.18486830592155457,
"signal/frontier_coverage_5/group_std_mean": 0.24456796944141387,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03077918142080307,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026436167769134044,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3445523798465729,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41339404582977296,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4024454474449158,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03445524051785469,
"step": 115
},
{
"calibration/aurc": 0.29549355034028324,
"calibration/batch_distribution_entropy": 0.9595540967587365,
"calibration/buffer_distribution_entropy": 0.8601922185063469,
"calibration/confidence_entropy": 0.4986714711203833,
"calibration/coverage@0%": 0.031789573354127215,
"calibration/coverage@1%": 0.031789573354127215,
"calibration/coverage@10%": 0.1535061824663028,
"calibration/coverage@15%": 0.36279937618358027,
"calibration/coverage@20%": 0.4954773309568898,
"calibration/coverage@25%": 0.5412442909658015,
"calibration/coverage@30%": 0.5628272251308901,
"calibration/coverage@5%": 0.12540659463072296,
"calibration/ece": 0.18322134358492953,
"calibration/mean_confidence": 0.5513955572073772,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012239583333333326,
"completions/max_length": 3744.0,
"completions/max_terminated_length": 3744.0,
"completions/mean_length": 705.9643310546875,
"completions/mean_terminated_length": 714.6814453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 219.2,
"epoch": 0.28799640004499943,
"grad_norm": 0.003744702087715268,
"learning_rate": 2.8846153846153845e-06,
"loss": -0.0343,
"num_tokens": 235225034.0,
"reward": 0.9743936657905579,
"reward_std": 0.12764326333999634,
"rewards/accuracy_reward": 0.669444453716278,
"rewards/brier_reward": 0.7871620059013367,
"rewards/confidence_uniqueness_reward": 0.9371712327003479,
"rewards/format_reward": 0.9875868082046508,
"rewards/frontier_coverage_0": 0.018821701966226102,
"rewards/frontier_coverage_1": 0.018821701966226102,
"rewards/frontier_coverage_10": 0.018821701966226102,
"rewards/frontier_coverage_15": 0.018821701966226102,
"rewards/frontier_coverage_20": 0.018821701966226102,
"rewards/frontier_coverage_25": 0.018821701966226102,
"rewards/frontier_coverage_5": 0.018821701966226102,
"rewards/frontier_entropy_batch_reward": -0.28439350724220275,
"signal/accuracy_reward/centered_abs_mean": 0.15498046576976776,
"signal/accuracy_reward/group_std_mean": 0.207235848903656,
"signal/accuracy_reward/group_zero_std_frac": 0.397222226858139,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9903906464576722,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07749023288488388,
"signal/advantage_abs_mean": 0.7500714540481568,
"signal/advantage_pre_scale_abs_mean": 0.09475149214267731,
"signal/advantage_pre_scale_std": 0.1524016410112381,
"signal/advantage_std": 0.9832012891769409,
"signal/brier_reward/centered_abs_mean": 0.1421737015247345,
"signal/brier_reward/group_std_mean": 0.18224007189273833,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18270126581192017,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014217370934784413,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031325727701187134,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05049348995089531,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04027099572122097,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031325726769864557,
"signal/format_reward/centered_abs_mean": 0.01992729976773262,
"signal/format_reward/group_std_mean": 0.03685051053762436,
"signal/format_reward/group_zero_std_frac": 0.8472222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12836382985115052,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00996364988386631,
"signal/frontier_coverage_0/centered_abs_mean": 0.18434422910213472,
"signal/frontier_coverage_0/group_std_mean": 0.24150433838367463,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03388373181223869,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026361226569861173,
"signal/frontier_coverage_1/centered_abs_mean": 0.18434422910213472,
"signal/frontier_coverage_1/group_std_mean": 0.24150433838367463,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03388373181223869,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026361226569861173,
"signal/frontier_coverage_10/centered_abs_mean": 0.18434422910213472,
"signal/frontier_coverage_10/group_std_mean": 0.24150433838367463,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03388373181223869,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026361226569861173,
"signal/frontier_coverage_15/centered_abs_mean": 0.18434422910213472,
"signal/frontier_coverage_15/group_std_mean": 0.24150433838367463,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03388373181223869,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026361226569861173,
"signal/frontier_coverage_20/centered_abs_mean": 0.18434422910213472,
"signal/frontier_coverage_20/group_std_mean": 0.24150433838367463,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03388373181223869,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026361226569861173,
"signal/frontier_coverage_25/centered_abs_mean": 0.18434422910213472,
"signal/frontier_coverage_25/group_std_mean": 0.24150433838367463,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03388373181223869,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026361226569861173,
"signal/frontier_coverage_5/centered_abs_mean": 0.18434422910213472,
"signal/frontier_coverage_5/group_std_mean": 0.24150433838367463,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03388373181223869,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026361226569861173,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3292065501213074,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39916940331459044,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42209590673446656,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03292065560817718,
"step": 120
},
{
"calibration/aurc": 0.15347697896922433,
"calibration/batch_distribution_entropy": 0.9279338995247632,
"calibration/buffer_distribution_entropy": 0.8692291819371754,
"calibration/confidence_entropy": 0.4797209891546649,
"calibration/coverage@0%": 0.029773499567166052,
"calibration/coverage@1%": 0.029773499567166052,
"calibration/coverage@10%": 0.432970158601062,
"calibration/coverage@15%": 0.5448622348892085,
"calibration/coverage@20%": 0.6590509926674206,
"calibration/coverage@25%": 0.8772514419071686,
"calibration/coverage@30%": 0.957779339334832,
"calibration/coverage@5%": 0.29304183072469436,
"calibration/ece": 0.1280785228841864,
"calibration/mean_confidence": 0.6281016420313883,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3060.8,
"completions/max_terminated_length": 3060.8,
"completions/mean_length": 678.2833251953125,
"completions/mean_terminated_length": 686.427001953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.6,
"epoch": 0.2999962500468744,
"grad_norm": 0.002649526810273528,
"learning_rate": 3.0048076923076923e-06,
"loss": -0.0264,
"num_tokens": 246156522.0,
"reward": 0.9789023041725159,
"reward_std": 0.13094690144062043,
"rewards/accuracy_reward": 0.6890625,
"rewards/brier_reward": 0.8089505910873414,
"rewards/confidence_uniqueness_reward": 0.9324007034301758,
"rewards/format_reward": 0.98828125,
"rewards/frontier_coverage_0": 0.020671736821532248,
"rewards/frontier_coverage_1": 0.020671736821532248,
"rewards/frontier_coverage_10": 0.020671736821532248,
"rewards/frontier_coverage_15": 0.020671736821532248,
"rewards/frontier_coverage_20": 0.020671736821532248,
"rewards/frontier_coverage_25": 0.020671736821532248,
"rewards/frontier_coverage_5": 0.020671736821532248,
"rewards/frontier_entropy_batch_reward": -0.35973963141441345,
"signal/accuracy_reward/centered_abs_mean": 0.16453993022441865,
"signal/accuracy_reward/group_std_mean": 0.21474986672401428,
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0882207155227661,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08226996511220933,
"signal/advantage_abs_mean": 0.7664442896842957,
"signal/advantage_pre_scale_abs_mean": 0.10022006183862686,
"signal/advantage_pre_scale_std": 0.1579681694507599,
"signal/advantage_std": 0.9831624984741211,
"signal/brier_reward/centered_abs_mean": 0.13348801881074907,
"signal/brier_reward/group_std_mean": 0.17074580788612365,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17603938281536102,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013348801992833614,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033481241390109065,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04822203889489174,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04422098770737648,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033481243066489697,
"signal/format_reward/centered_abs_mean": 0.01887478269636631,
"signal/format_reward/group_std_mean": 0.030514462664723397,
"signal/format_reward/group_zero_std_frac": 0.8888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12388529032468795,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009437391348183156,
"signal/frontier_coverage_0/centered_abs_mean": 0.16123655140399934,
"signal/frontier_coverage_0/group_std_mean": 0.21205799281597137,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030381349101662635,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002305682725273073,
"signal/frontier_coverage_1/centered_abs_mean": 0.16123655140399934,
"signal/frontier_coverage_1/group_std_mean": 0.21205799281597137,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030381349101662635,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002305682725273073,
"signal/frontier_coverage_10/centered_abs_mean": 0.16123655140399934,
"signal/frontier_coverage_10/group_std_mean": 0.21205799281597137,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030381349101662635,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002305682725273073,
"signal/frontier_coverage_15/centered_abs_mean": 0.16123655140399934,
"signal/frontier_coverage_15/group_std_mean": 0.21205799281597137,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030381349101662635,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002305682725273073,
"signal/frontier_coverage_20/centered_abs_mean": 0.16123655140399934,
"signal/frontier_coverage_20/group_std_mean": 0.21205799281597137,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030381349101662635,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002305682725273073,
"signal/frontier_coverage_25/centered_abs_mean": 0.16123655140399934,
"signal/frontier_coverage_25/group_std_mean": 0.21205799281597137,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030381349101662635,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002305682725273073,
"signal/frontier_coverage_5/centered_abs_mean": 0.16123655140399934,
"signal/frontier_coverage_5/group_std_mean": 0.21205799281597137,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030381349101662635,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002305682725273073,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35917252898216245,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4227446138858795,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47557589411735535,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03591725528240204,
"step": 125
},
{
"calibration/aurc": 0.24075648361334556,
"calibration/batch_distribution_entropy": 0.9367937132872782,
"calibration/buffer_distribution_entropy": 0.8743558081549685,
"calibration/confidence_entropy": 0.48164570637542764,
"calibration/coverage@0%": 0.00796905532697007,
"calibration/coverage@1%": 0.00796905532697007,
"calibration/coverage@10%": 0.1891113738103692,
"calibration/coverage@15%": 0.2610659593280283,
"calibration/coverage@20%": 0.43392440666402105,
"calibration/coverage@25%": 0.5412922742747343,
"calibration/coverage@30%": 0.6796504855991146,
"calibration/coverage@5%": 0.00796905532697007,
"calibration/ece": 0.11142523508126075,
"calibration/mean_confidence": 0.6235650795376477,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.024913194444444443,
"completions/max_length": 3119.8,
"completions/max_terminated_length": 3119.8,
"completions/mean_length": 677.76962890625,
"completions/mean_terminated_length": 695.1375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.0,
"epoch": 0.3119961000487494,
"grad_norm": 0.00250594737008214,
"learning_rate": 3.125e-06,
"loss": -0.0618,
"num_tokens": 257089228.0,
"reward": 0.9503241419792176,
"reward_std": 0.1572349935770035,
"rewards/accuracy_reward": 0.6512152791023255,
"rewards/brier_reward": 0.7802307844161988,
"rewards/confidence_uniqueness_reward": 0.9184599637985229,
"rewards/format_reward": 0.9747395873069763,
"rewards/frontier_coverage_0": 0.020750408340245486,
"rewards/frontier_coverage_1": 0.020750408340245486,
"rewards/frontier_coverage_10": 0.020750408340245486,
"rewards/frontier_coverage_15": 0.020750408340245486,
"rewards/frontier_coverage_20": 0.020750408340245486,
"rewards/frontier_coverage_25": 0.020750408340245486,
"rewards/frontier_coverage_5": 0.020750408340245486,
"rewards/frontier_entropy_batch_reward": -0.34599482715129853,
"signal/accuracy_reward/centered_abs_mean": 0.18407118022441865,
"signal/accuracy_reward/group_std_mean": 0.2376004248857498,
"signal/accuracy_reward/group_zero_std_frac": 0.3500000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0418458223342895,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09203559011220933,
"signal/advantage_abs_mean": 0.7402811050415039,
"signal/advantage_pre_scale_abs_mean": 0.11711515635251998,
"signal/advantage_pre_scale_std": 0.18535294532775878,
"signal/advantage_std": 0.9833332061767578,
"signal/brier_reward/centered_abs_mean": 0.15127059519290925,
"signal/brier_reward/group_std_mean": 0.19668636918067933,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1720490723848343,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015127059258520602,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05016105026006699,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07742284685373306,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05758904665708542,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005016105063259601,
"signal/format_reward/centered_abs_mean": 0.03739691823720932,
"signal/format_reward/group_std_mean": 0.06282595321536064,
"signal/format_reward/group_zero_std_frac": 0.7638888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21188210248947142,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01869845911860466,
"signal/frontier_coverage_0/centered_abs_mean": 0.17104237377643586,
"signal/frontier_coverage_0/group_std_mean": 0.22613960802555083,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02733422853052616,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024459057254716753,
"signal/frontier_coverage_1/centered_abs_mean": 0.17104237377643586,
"signal/frontier_coverage_1/group_std_mean": 0.22613960802555083,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02733422853052616,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024459057254716753,
"signal/frontier_coverage_10/centered_abs_mean": 0.17104237377643586,
"signal/frontier_coverage_10/group_std_mean": 0.22613960802555083,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02733422853052616,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024459057254716753,
"signal/frontier_coverage_15/centered_abs_mean": 0.17104237377643586,
"signal/frontier_coverage_15/group_std_mean": 0.22613960802555083,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02733422853052616,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024459057254716753,
"signal/frontier_coverage_20/centered_abs_mean": 0.17104237377643586,
"signal/frontier_coverage_20/group_std_mean": 0.22613960802555083,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02733422853052616,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024459057254716753,
"signal/frontier_coverage_25/centered_abs_mean": 0.17104237377643586,
"signal/frontier_coverage_25/group_std_mean": 0.22613960802555083,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02733422853052616,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024459057254716753,
"signal/frontier_coverage_5/centered_abs_mean": 0.17104237377643586,
"signal/frontier_coverage_5/group_std_mean": 0.22613960802555083,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02733422853052616,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024459057254716753,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3354610979557037,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4053588271141052,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38607336282730104,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03354610875248909,
"step": 130
},
{
"calibration/aurc": 0.20758602314082264,
"calibration/batch_distribution_entropy": 0.9303398449193102,
"calibration/buffer_distribution_entropy": 0.879867407291882,
"calibration/confidence_entropy": 0.5022852646546221,
"calibration/coverage@0%": 0.0857999849518162,
"calibration/coverage@1%": 0.1547294888682653,
"calibration/coverage@10%": 0.3439933023262488,
"calibration/coverage@15%": 0.4155660301093286,
"calibration/coverage@20%": 0.5031820825736764,
"calibration/coverage@25%": 0.5919049119490418,
"calibration/coverage@30%": 0.6689481417605041,
"calibration/coverage@5%": 0.2552832986170347,
"calibration/ece": 0.19673193883504392,
"calibration/mean_confidence": 0.5709542997574626,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03003472222222221,
"completions/max_length": 3558.6,
"completions/max_terminated_length": 3558.6,
"completions/mean_length": 646.0067749023438,
"completions/mean_terminated_length": 665.8999633789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.32399595005062437,
"grad_norm": 0.0025195139460265636,
"learning_rate": 3.245192307692308e-06,
"loss": -0.0755,
"num_tokens": 267624250.0,
"reward": 0.9493022203445435,
"reward_std": 0.16051433086395264,
"rewards/accuracy_reward": 0.6564236164093018,
"rewards/brier_reward": 0.7611977815628052,
"rewards/confidence_uniqueness_reward": 0.9178310751914978,
"rewards/format_reward": 0.9690972208976746,
"rewards/frontier_coverage_0": 0.002158835157752037,
"rewards/frontier_coverage_1": 0.002158835157752037,
"rewards/frontier_coverage_10": 0.002158835157752037,
"rewards/frontier_coverage_15": 0.002158835157752037,
"rewards/frontier_coverage_20": 0.002158835157752037,
"rewards/frontier_coverage_25": 0.002158835157752037,
"rewards/frontier_coverage_5": 0.002158835157752037,
"rewards/frontier_entropy_batch_reward": -0.31577223539352417,
"signal/accuracy_reward/centered_abs_mean": 0.17967664897441865,
"signal/accuracy_reward/group_std_mean": 0.23678012788295746,
"signal/accuracy_reward/group_zero_std_frac": 0.3250000089406967,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9074344158172607,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08983832448720933,
"signal/advantage_abs_mean": 0.7302809596061707,
"signal/advantage_pre_scale_abs_mean": 0.11728082150220871,
"signal/advantage_pre_scale_std": 0.18725473880767823,
"signal/advantage_std": 0.9834550261497498,
"signal/brier_reward/centered_abs_mean": 0.15965774655342102,
"signal/brier_reward/group_std_mean": 0.20320362746715545,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1627894550561905,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01596577502787113,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.057780878990888594,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09125057309865951,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05861600786447525,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005778087861835956,
"signal/format_reward/centered_abs_mean": 0.04900173619389534,
"signal/format_reward/group_std_mean": 0.08135172799229622,
"signal/format_reward/group_zero_std_frac": 0.6972222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.24763497412204744,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02450086809694767,
"signal/frontier_coverage_0/centered_abs_mean": 0.2009547770023346,
"signal/frontier_coverage_0/group_std_mean": 0.2594577521085739,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029287652298808097,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002873653150163591,
"signal/frontier_coverage_1/centered_abs_mean": 0.2009547770023346,
"signal/frontier_coverage_1/group_std_mean": 0.2594577521085739,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029287652298808097,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002873653150163591,
"signal/frontier_coverage_10/centered_abs_mean": 0.2009547770023346,
"signal/frontier_coverage_10/group_std_mean": 0.2594577521085739,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029287652298808097,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002873653150163591,
"signal/frontier_coverage_15/centered_abs_mean": 0.2009547770023346,
"signal/frontier_coverage_15/group_std_mean": 0.2594577521085739,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029287652298808097,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002873653150163591,
"signal/frontier_coverage_20/centered_abs_mean": 0.2009547770023346,
"signal/frontier_coverage_20/group_std_mean": 0.2594577521085739,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029287652298808097,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002873653150163591,
"signal/frontier_coverage_25/centered_abs_mean": 0.2009547770023346,
"signal/frontier_coverage_25/group_std_mean": 0.2594577521085739,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029287652298808097,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002873653150163591,
"signal/frontier_coverage_5/centered_abs_mean": 0.2009547770023346,
"signal/frontier_coverage_5/group_std_mean": 0.2594577521085739,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029287652298808097,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002873653150163591,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.338104647397995,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4068137645721436,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.34629579186439513,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033810464292764665,
"step": 135
},
{
"calibration/aurc": 0.18906472468836222,
"calibration/batch_distribution_entropy": 0.8814205304070386,
"calibration/buffer_distribution_entropy": 0.8831141026061242,
"calibration/confidence_entropy": 0.4783131070096413,
"calibration/coverage@0%": 0.007956662839798317,
"calibration/coverage@1%": 0.007956662839798317,
"calibration/coverage@10%": 0.19745358536747154,
"calibration/coverage@15%": 0.3055121954284983,
"calibration/coverage@20%": 0.6023303658836751,
"calibration/coverage@25%": 0.8259447416874842,
"calibration/coverage@30%": 0.9512643107725396,
"calibration/coverage@5%": 0.05874200315393445,
"calibration/ece": 0.09537625207937526,
"calibration/mean_confidence": 0.6794771410097296,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028732638888888905,
"completions/max_length": 3704.2,
"completions/max_terminated_length": 3704.2,
"completions/mean_length": 659.917626953125,
"completions/mean_terminated_length": 679.4125610351563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 190.2,
"epoch": 0.33599580005249935,
"grad_norm": 0.0030611190013587475,
"learning_rate": 3.365384615384616e-06,
"loss": -0.078,
"num_tokens": 278330725.0,
"reward": 0.942699670791626,
"reward_std": 0.16864260733127595,
"rewards/accuracy_reward": 0.6534722328186036,
"rewards/brier_reward": 0.7861078023910523,
"rewards/confidence_uniqueness_reward": 0.9161162137985229,
"rewards/format_reward": 0.9706597089767456,
"rewards/frontier_coverage_0": 0.02347471434623003,
"rewards/frontier_coverage_1": 0.02347471434623003,
"rewards/frontier_coverage_10": 0.02347471434623003,
"rewards/frontier_coverage_15": 0.02347471434623003,
"rewards/frontier_coverage_20": 0.02347471434623003,
"rewards/frontier_coverage_25": 0.02347471434623003,
"rewards/frontier_coverage_5": 0.02347471434623003,
"rewards/frontier_entropy_batch_reward": -0.41938512921333315,
"signal/accuracy_reward/centered_abs_mean": 0.17179904580116273,
"signal/accuracy_reward/group_std_mean": 0.22940363883972167,
"signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9489439964294434,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08589952290058137,
"signal/advantage_abs_mean": 0.7321731686592102,
"signal/advantage_pre_scale_abs_mean": 0.12239420711994171,
"signal/advantage_pre_scale_std": 0.19716466069221497,
"signal/advantage_std": 0.9833737254142761,
"signal/brier_reward/centered_abs_mean": 0.1507904589176178,
"signal/brier_reward/group_std_mean": 0.1969256341457367,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16663262248039246,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015079045854508876,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06016582772135735,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09862598031759262,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0665690153837204,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006016582809388637,
"signal/format_reward/centered_abs_mean": 0.04906684011220932,
"signal/format_reward/group_std_mean": 0.08623605370521545,
"signal/format_reward/group_zero_std_frac": 0.669444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.26951748728752134,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02453342005610466,
"signal/frontier_coverage_0/centered_abs_mean": 0.1365742042660713,
"signal/frontier_coverage_0/group_std_mean": 0.185016992688179,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02126149646937847,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019530110061168672,
"signal/frontier_coverage_1/centered_abs_mean": 0.1365742042660713,
"signal/frontier_coverage_1/group_std_mean": 0.185016992688179,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02126149646937847,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019530110061168672,
"signal/frontier_coverage_10/centered_abs_mean": 0.1365742042660713,
"signal/frontier_coverage_10/group_std_mean": 0.185016992688179,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02126149646937847,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019530110061168672,
"signal/frontier_coverage_15/centered_abs_mean": 0.1365742042660713,
"signal/frontier_coverage_15/group_std_mean": 0.185016992688179,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02126149646937847,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019530110061168672,
"signal/frontier_coverage_20/centered_abs_mean": 0.1365742042660713,
"signal/frontier_coverage_20/group_std_mean": 0.185016992688179,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02126149646937847,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019530110061168672,
"signal/frontier_coverage_25/centered_abs_mean": 0.1365742042660713,
"signal/frontier_coverage_25/group_std_mean": 0.185016992688179,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02126149646937847,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019530110061168672,
"signal/frontier_coverage_5/centered_abs_mean": 0.1365742042660713,
"signal/frontier_coverage_5/group_std_mean": 0.185016992688179,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02126149646937847,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019530110061168672,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3603252172470093,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4246739625930786,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4002987802028656,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036032522842288014,
"step": 140
},
{
"calibration/aurc": 0.15697987006755532,
"calibration/batch_distribution_entropy": 0.9844253334200659,
"calibration/buffer_distribution_entropy": 0.8905394420507413,
"calibration/confidence_entropy": 0.48629962729987214,
"calibration/coverage@0%": 0.037176911659670284,
"calibration/coverage@1%": 0.037176911659670284,
"calibration/coverage@10%": 0.44460642204611933,
"calibration/coverage@15%": 0.5577586774421538,
"calibration/coverage@20%": 0.6965503392766921,
"calibration/coverage@25%": 0.790469339916671,
"calibration/coverage@30%": 0.8630696403110196,
"calibration/coverage@5%": 0.26364939990934594,
"calibration/ece": 0.16339695802470103,
"calibration/mean_confidence": 0.5240559906457406,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025868055555555512,
"completions/max_length": 3513.4,
"completions/max_terminated_length": 3513.4,
"completions/mean_length": 654.6441162109375,
"completions/mean_terminated_length": 672.145263671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.4,
"epoch": 0.34799565005437433,
"grad_norm": 0.0024486000183969736,
"learning_rate": 3.4855769230769233e-06,
"loss": -0.0728,
"num_tokens": 288936833.0,
"reward": 0.981769073009491,
"reward_std": 0.14131342768669128,
"rewards/accuracy_reward": 0.7016493082046509,
"rewards/brier_reward": 0.7855762124061585,
"rewards/confidence_uniqueness_reward": 0.9288261771202088,
"rewards/format_reward": 0.9740451335906982,
"rewards/frontier_coverage_0": 0.0006571165286004543,
"rewards/frontier_coverage_1": 0.0006571165286004543,
"rewards/frontier_coverage_10": 0.0006571165286004543,
"rewards/frontier_coverage_15": 0.0006571165286004543,
"rewards/frontier_coverage_20": 0.0006571165286004543,
"rewards/frontier_coverage_25": 0.0006571165286004543,
"rewards/frontier_coverage_5": 0.0006571165286004543,
"rewards/frontier_entropy_batch_reward": -0.2758418798446655,
"signal/accuracy_reward/centered_abs_mean": 0.14620768427848815,
"signal/accuracy_reward/group_std_mean": 0.1999648928642273,
"signal/accuracy_reward/group_zero_std_frac": 0.40555556416511535,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.883146071434021,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07310384213924408,
"signal/advantage_abs_mean": 0.7370108008384705,
"signal/advantage_pre_scale_abs_mean": 0.10200686007738113,
"signal/advantage_pre_scale_std": 0.1717151403427124,
"signal/advantage_std": 0.9832675337791443,
"signal/brier_reward/centered_abs_mean": 0.14895096719264983,
"signal/brier_reward/group_std_mean": 0.19092276692390442,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1803687483072281,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014895097352564335,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04965474233031273,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07973605394363403,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.059922744333744046,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004965474270284176,
"signal/format_reward/centered_abs_mean": 0.04082573838531971,
"signal/format_reward/group_std_mean": 0.06935827732086182,
"signal/format_reward/group_zero_std_frac": 0.7416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2456405758857727,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.020412869192659854,
"signal/frontier_coverage_0/centered_abs_mean": 0.1881540447473526,
"signal/frontier_coverage_0/group_std_mean": 0.24492749869823455,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032554148137569426,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026906028389930723,
"signal/frontier_coverage_1/centered_abs_mean": 0.1881540447473526,
"signal/frontier_coverage_1/group_std_mean": 0.24492749869823455,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032554148137569426,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026906028389930723,
"signal/frontier_coverage_10/centered_abs_mean": 0.1881540447473526,
"signal/frontier_coverage_10/group_std_mean": 0.24492749869823455,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.032554148137569426,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026906028389930723,
"signal/frontier_coverage_15/centered_abs_mean": 0.1881540447473526,
"signal/frontier_coverage_15/group_std_mean": 0.24492749869823455,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.032554148137569426,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026906028389930723,
"signal/frontier_coverage_20/centered_abs_mean": 0.1881540447473526,
"signal/frontier_coverage_20/group_std_mean": 0.24492749869823455,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.032554148137569426,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026906028389930723,
"signal/frontier_coverage_25/centered_abs_mean": 0.1881540447473526,
"signal/frontier_coverage_25/group_std_mean": 0.24492749869823455,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.032554148137569426,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026906028389930723,
"signal/frontier_coverage_5/centered_abs_mean": 0.1881540447473526,
"signal/frontier_coverage_5/group_std_mean": 0.24492749869823455,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032554148137569426,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026906028389930723,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3361767590045929,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4066691756248474,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40922998189926146,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0336176760494709,
"step": 145
},
{
"calibration/aurc": 0.1722534660782087,
"calibration/batch_distribution_entropy": 0.9496003073139374,
"calibration/buffer_distribution_entropy": 0.9066922407574388,
"calibration/confidence_entropy": 0.48104717729150837,
"calibration/coverage@0%": 0.042072927041753645,
"calibration/coverage@1%": 0.042072927041753645,
"calibration/coverage@10%": 0.3972435195237208,
"calibration/coverage@15%": 0.5266129172586447,
"calibration/coverage@20%": 0.6327406117153822,
"calibration/coverage@25%": 0.7269504438656763,
"calibration/coverage@30%": 0.7957203851288851,
"calibration/coverage@5%": 0.17130425323208068,
"calibration/ece": 0.17626248571877973,
"calibration/mean_confidence": 0.5680134667175254,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015190972222222232,
"completions/max_length": 3418.2,
"completions/max_terminated_length": 3418.2,
"completions/mean_length": 712.5080932617187,
"completions/mean_terminated_length": 723.4612670898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 183.6,
"epoch": 0.3599955000562493,
"grad_norm": 0.0022300081327557564,
"learning_rate": 3.605769230769231e-06,
"loss": -0.0333,
"num_tokens": 300255262.0,
"reward": 0.9809937477111816,
"reward_std": 0.13701283782720566,
"rewards/accuracy_reward": 0.6916666626930237,
"rewards/brier_reward": 0.804995310306549,
"rewards/confidence_uniqueness_reward": 0.9330321192741394,
"rewards/format_reward": 0.9847222089767456,
"rewards/frontier_coverage_0": 0.020913063362240792,
"rewards/frontier_coverage_1": 0.020913063362240792,
"rewards/frontier_coverage_10": 0.020913063362240792,
"rewards/frontier_coverage_15": 0.020913063362240792,
"rewards/frontier_coverage_20": 0.020913063362240792,
"rewards/frontier_coverage_25": 0.020913063362240792,
"rewards/frontier_coverage_5": 0.020913063362240792,
"rewards/frontier_entropy_batch_reward": -0.33096869885921476,
"signal/accuracy_reward/centered_abs_mean": 0.16299913227558135,
"signal/accuracy_reward/group_std_mean": 0.21699598133563996,
"signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.002332079410553,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08149956613779068,
"signal/advantage_abs_mean": 0.7397428989410401,
"signal/advantage_pre_scale_abs_mean": 0.09951845407485962,
"signal/advantage_pre_scale_std": 0.16154283583164214,
"signal/advantage_std": 0.9832315325737,
"signal/brier_reward/centered_abs_mean": 0.14122247993946074,
"signal/brier_reward/group_std_mean": 0.1847561001777649,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17594003081321716,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014122248627245426,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03645235523581505,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06082368865609169,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.045828116685152055,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003645235579460859,
"signal/format_reward/centered_abs_mean": 0.02452256940305233,
"signal/format_reward/group_std_mean": 0.0467927910387516,
"signal/format_reward/group_zero_std_frac": 0.8027777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.153493233025074,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012261284701526164,
"signal/frontier_coverage_0/centered_abs_mean": 0.1739561676979065,
"signal/frontier_coverage_0/group_std_mean": 0.23288733959198,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030741161853075027,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002487573237158358,
"signal/frontier_coverage_1/centered_abs_mean": 0.1739561676979065,
"signal/frontier_coverage_1/group_std_mean": 0.23288733959198,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030741161853075027,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002487573237158358,
"signal/frontier_coverage_10/centered_abs_mean": 0.1739561676979065,
"signal/frontier_coverage_10/group_std_mean": 0.23288733959198,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030741161853075027,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002487573237158358,
"signal/frontier_coverage_15/centered_abs_mean": 0.1739561676979065,
"signal/frontier_coverage_15/group_std_mean": 0.23288733959198,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030741161853075027,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002487573237158358,
"signal/frontier_coverage_20/centered_abs_mean": 0.1739561676979065,
"signal/frontier_coverage_20/group_std_mean": 0.23288733959198,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030741161853075027,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002487573237158358,
"signal/frontier_coverage_25/centered_abs_mean": 0.1739561676979065,
"signal/frontier_coverage_25/group_std_mean": 0.23288733959198,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030741161853075027,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002487573237158358,
"signal/frontier_coverage_5/centered_abs_mean": 0.1739561676979065,
"signal/frontier_coverage_5/group_std_mean": 0.23288733959198,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030741161853075027,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002487573237158358,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33918721675872804,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40726816058158877,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42680872082710264,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033918721601367,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_calibration/aurc": 0.14129311824865207,
"eval_calibration/batch_distribution_entropy": 0.9104665702327397,
"eval_calibration/buffer_distribution_entropy": 0.9172057151238991,
"eval_calibration/confidence_entropy": 0.5052203374676412,
"eval_calibration/coverage@0%": 0.25285618279569894,
"eval_calibration/coverage@1%": 0.25285618279569894,
"eval_calibration/coverage@10%": 0.4534610215053763,
"eval_calibration/coverage@15%": 0.668010752688172,
"eval_calibration/coverage@20%": 0.777385752688172,
"eval_calibration/coverage@25%": 0.9151545698924731,
"eval_calibration/coverage@30%": 0.9623655913978495,
"eval_calibration/coverage@5%": 0.358366935483871,
"eval_calibration/ece": 0.24812658669354834,
"eval_calibration/mean_confidence": 0.5544641391129032,
"eval_completions/clipped_ratio": 0.013715277777777776,
"eval_completions/max_length": 2725.5,
"eval_completions/max_terminated_length": 2725.5,
"eval_completions/mean_length": 682.8199768066406,
"eval_completions/mean_terminated_length": 692.2769368489584,
"eval_completions/min_length": 0.0,
"eval_completions/min_terminated_length": 232.66666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 300255262.0,
"eval_reward": 0.9028725524743398,
"eval_reward_std": 0.22518395135800043,
"eval_rewards/accuracy_reward": 0.6770833333333334,
"eval_rewards/brier_reward": 0.7904597421487173,
"eval_rewards/confidence_uniqueness_reward": 0.8859053353468577,
"eval_rewards/format_reward": 0.9869791666666666,
"eval_rewards/frontier_coverage_0": 0.019008085131645203,
"eval_rewards/frontier_coverage_1": 0.019008085131645203,
"eval_rewards/frontier_coverage_10": 0.019008085131645203,
"eval_rewards/frontier_coverage_15": 0.019008085131645203,
"eval_rewards/frontier_coverage_20": 0.019008085131645203,
"eval_rewards/frontier_coverage_25": 0.019008085131645203,
"eval_rewards/frontier_coverage_5": 0.019008085131645203,
"eval_rewards/frontier_entropy_batch_reward": -0.9869791666666666,
"eval_runtime": 209.653,
"eval_samples_per_second": 4.77,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4202473958333333,
"eval_signal/accuracy_reward/group_std_mean": 0.46384624640146893,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9487552146116892,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21012369791666666,
"eval_signal/advantage_abs_mean": 0.8500068088372549,
"eval_signal/advantage_pre_scale_abs_mean": 0.1913518731792768,
"eval_signal/advantage_pre_scale_std": 0.22435899823904037,
"eval_signal/advantage_std": 0.9863767127195994,
"eval_signal/brier_reward/centered_abs_mean": 0.19685744742552438,
"eval_signal/brier_reward/group_std_mean": 0.25481796513001126,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0891647810737292,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019685745239257812,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05400043291350206,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09014810870091121,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02440998361756404,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005400043601791064,
"eval_signal/format_reward/centered_abs_mean": 0.024793836132933695,
"eval_signal/format_reward/group_std_mean": 0.06416239372144143,
"eval_signal/format_reward/group_zero_std_frac": 0.6666666915019354,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.05534577121337255,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012396918066466847,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2995465894540151,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4160442252953847,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01938629026214282,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004283516357342402,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2995465894540151,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4160442252953847,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01938629026214282,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004283516357342402,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2995465894540151,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4160442252953847,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01938629026214282,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004283516357342402,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2995465894540151,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4160442252953847,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01938629026214282,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004283516357342402,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2995465894540151,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4160442252953847,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01938629026214282,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004283516357342402,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2995465894540151,
"eval_signal/frontier_coverage_25/group_std_mean": 0.4160442252953847,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01938629026214282,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004283516357342402,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2995465894540151,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4160442252953847,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01938629026214282,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004283516357342402,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.024793836132933695,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.06416239372144143,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6666666915019354,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011069154599681497,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002479383605532348,
"eval_steps_per_second": 0.029,
"step": 150
},
{
"epoch": 0.3599955000562493,
"step": 150,
"train_probe_calibration/aurc": 0.12726604129252259,
"train_probe_calibration/batch_distribution_entropy": 0.9006689267592959,
"train_probe_calibration/buffer_distribution_entropy": 0.9187270539917702,
"train_probe_calibration/confidence_entropy": 0.4242119471569765,
"train_probe_calibration/coverage@0%": 0.31468413978494625,
"train_probe_calibration/coverage@1%": 0.31468413978494625,
"train_probe_calibration/coverage@10%": 0.42422715053763443,
"train_probe_calibration/coverage@15%": 0.6587701612903226,
"train_probe_calibration/coverage@20%": 0.7639448924731184,
"train_probe_calibration/coverage@25%": 0.9104502688172044,
"train_probe_calibration/coverage@30%": 0.9524529569892474,
"train_probe_calibration/coverage@5%": 0.31468413978494625,
"train_probe_calibration/ece": 0.2248022620967742,
"train_probe_calibration/mean_confidence": 0.5427504596774193,
"train_probe_completions/clipped_ratio": 0.007812500000000019,
"train_probe_completions/max_length": 2014.8333333333333,
"train_probe_completions/max_terminated_length": 2014.8333333333333,
"train_probe_completions/mean_length": 680.6555786132812,
"train_probe_completions/mean_terminated_length": 685.9972839355469,
"train_probe_completions/min_length": 34.0,
"train_probe_completions/min_terminated_length": 193.66666666666666,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 300255262.0,
"train_probe_reward": 0.9183924396832784,
"train_probe_reward_std": 0.21424931039412817,
"train_probe_rewards/accuracy_reward": 0.7005208333333334,
"train_probe_rewards/brier_reward": 0.8044686516125997,
"train_probe_rewards/confidence_uniqueness_reward": 0.8893264333407084,
"train_probe_rewards/format_reward": 0.9921875099341074,
"train_probe_rewards/frontier_coverage_0": 0.01875637743311624,
"train_probe_rewards/frontier_coverage_1": 0.01875637743311624,
"train_probe_rewards/frontier_coverage_10": 0.01875637743311624,
"train_probe_rewards/frontier_coverage_15": 0.01875637743311624,
"train_probe_rewards/frontier_coverage_20": 0.01875637743311624,
"train_probe_rewards/frontier_coverage_25": 0.01875637743311624,
"train_probe_rewards/frontier_coverage_5": 0.01875637743311624,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9921875099341074,
"train_probe_runtime": 194.1418,
"train_probe_samples_per_second": 5.151,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4080403645833333,
"train_probe_signal/accuracy_reward/group_std_mean": 0.45769575734933216,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9646410147349039,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20402018229166666,
"train_probe_signal/advantage_abs_mean": 0.845906933148702,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.181622343758742,
"train_probe_signal/advantage_pre_scale_std": 0.2128123790025711,
"train_probe_signal/advantage_std": 0.9863570928573608,
"train_probe_signal/brier_reward/centered_abs_mean": 0.19344795495271683,
"train_probe_signal/brier_reward/group_std_mean": 0.25178463260332745,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0916658565402031,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0193447961161534,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0479762547959884,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0767448153346777,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022632751303414505,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004797625510642926,
"train_probe_signal/format_reward/centered_abs_mean": 0.015136718284338713,
"train_probe_signal/format_reward/group_std_mean": 0.044194173999130726,
"train_probe_signal/format_reward/group_zero_std_frac": 0.7500000248352686,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.03515924823780855,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.007568359142169356,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.30051541328430176,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.4227322389682134,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02031859972824653,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004297370323911309,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30051541328430176,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4227322389682134,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02031859972824653,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004297370323911309,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30051541328430176,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.4227322389682134,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02031859972824653,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004297370323911309,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.30051541328430176,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.4227322389682134,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02031859972824653,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004297370323911309,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.30051541328430176,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.4227322389682134,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02031859972824653,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004297370323911309,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.30051541328430176,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.4227322389682134,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02031859972824653,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004297370323911309,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30051541328430176,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4227322389682134,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02031859972824653,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004297370323911309,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.015136718284338713,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.044194173999130726,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7500000248352686,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0070318499735246105,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0015136719254466395,
"train_probe_steps_per_second": 0.031
},
{
"calibration/aurc": 0.15515567262371474,
"calibration/batch_distribution_entropy": 0.9796127451140773,
"calibration/buffer_distribution_entropy": 0.9254467256231689,
"calibration/confidence_entropy": 0.4908238893003113,
"calibration/coverage@0%": 0.024257363867401783,
"calibration/coverage@1%": 0.024257363867401783,
"calibration/coverage@10%": 0.26837621050000254,
"calibration/coverage@15%": 0.6128535749082874,
"calibration/coverage@20%": 0.7565885918985946,
"calibration/coverage@25%": 0.8687765224579774,
"calibration/coverage@30%": 0.9395811518324606,
"calibration/coverage@5%": 0.13990723124140708,
"calibration/ece": 0.19067582396163238,
"calibration/mean_confidence": 0.5558143400531235,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008506944444444442,
"completions/max_length": 3296.2,
"completions/max_terminated_length": 3296.2,
"completions/mean_length": 660.3401123046875,
"completions/mean_terminated_length": 666.063525390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.8,
"epoch": 0.3719953500581243,
"grad_norm": 0.0022886916995048523,
"learning_rate": 3.725961538461539e-06,
"loss": -0.0212,
"num_tokens": 310970092.0,
"reward": 1.0068758606910706,
"reward_std": 0.12418241798877716,
"rewards/accuracy_reward": 0.7279513955116272,
"rewards/brier_reward": 0.8095590114593506,
"rewards/confidence_uniqueness_reward": 0.9424502491950989,
"rewards/format_reward": 0.9914930462837219,
"rewards/frontier_coverage_0": 0.0012785772909410299,
"rewards/frontier_coverage_1": 0.0012785772909410299,
"rewards/frontier_coverage_10": 0.0012785772909410299,
"rewards/frontier_coverage_15": 0.0012785772909410299,
"rewards/frontier_coverage_20": 0.0012785772909410299,
"rewards/frontier_coverage_25": 0.0019289300893433393,
"rewards/frontier_coverage_5": 0.0012785772909410299,
"rewards/frontier_entropy_batch_reward": -0.2818462073802948,
"signal/accuracy_reward/centered_abs_mean": 0.16677517294883729,
"signal/accuracy_reward/group_std_mean": 0.21808722317218782,
"signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.116166591644287,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08338758647441864,
"signal/advantage_abs_mean": 0.7497113704681396,
"signal/advantage_pre_scale_abs_mean": 0.09206513911485673,
"signal/advantage_pre_scale_std": 0.14799903333187103,
"signal/advantage_std": 0.9831433296203613,
"signal/brier_reward/centered_abs_mean": 0.13939605057239532,
"signal/brier_reward/group_std_mean": 0.17993341088294984,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18695828914642335,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01393960528075695,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027320655062794685,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04605055823922157,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.036707811057567596,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027320656459778546,
"signal/format_reward/centered_abs_mean": 0.015180121548473835,
"signal/format_reward/group_std_mean": 0.0315033558756113,
"signal/format_reward/group_zero_std_frac": 0.8611111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10213274359703065,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007590060774236918,
"signal/frontier_coverage_0/centered_abs_mean": 0.1950198382139206,
"signal/frontier_coverage_0/group_std_mean": 0.255666583776474,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03742770254611969,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027887837029993536,
"signal/frontier_coverage_1/centered_abs_mean": 0.1950198382139206,
"signal/frontier_coverage_1/group_std_mean": 0.255666583776474,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03742770254611969,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027887837029993536,
"signal/frontier_coverage_10/centered_abs_mean": 0.1950198382139206,
"signal/frontier_coverage_10/group_std_mean": 0.255666583776474,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03742770254611969,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027887837029993536,
"signal/frontier_coverage_15/centered_abs_mean": 0.1950198382139206,
"signal/frontier_coverage_15/group_std_mean": 0.255666583776474,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03742770254611969,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027887837029993536,
"signal/frontier_coverage_20/centered_abs_mean": 0.1950198382139206,
"signal/frontier_coverage_20/group_std_mean": 0.255666583776474,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03742770254611969,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027887837029993536,
"signal/frontier_coverage_25/centered_abs_mean": 0.19435930848121644,
"signal/frontier_coverage_25/group_std_mean": 0.2547897040843964,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037307870388031,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027793380431830883,
"signal/frontier_coverage_5/centered_abs_mean": 0.1950198382139206,
"signal/frontier_coverage_5/group_std_mean": 0.255666583776474,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03742770254611969,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027887837029993536,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.321687251329422,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3919772803783417,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4316555678844452,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0321687251329422,
"step": 155
},
{
"calibration/aurc": 0.13322411582750976,
"calibration/batch_distribution_entropy": 0.9164133932874314,
"calibration/buffer_distribution_entropy": 0.9378270413347043,
"calibration/confidence_entropy": 0.49663806864050064,
"calibration/coverage@0%": 0.10162749168933057,
"calibration/coverage@1%": 0.16073039406400075,
"calibration/coverage@10%": 0.6032914185290437,
"calibration/coverage@15%": 0.6842972947671463,
"calibration/coverage@20%": 0.8049335629921259,
"calibration/coverage@25%": 0.847260498687664,
"calibration/coverage@30%": 0.8797900262467191,
"calibration/coverage@5%": 0.3701992662338055,
"calibration/ece": 0.13629638618002707,
"calibration/mean_confidence": 0.6444682599329354,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011979166666666674,
"completions/max_length": 3493.4,
"completions/max_terminated_length": 3493.4,
"completions/mean_length": 676.3605834960938,
"completions/mean_terminated_length": 684.583251953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.0,
"epoch": 0.38399520005999926,
"grad_norm": 0.0023072566837072372,
"learning_rate": 3.846153846153847e-06,
"loss": -0.0317,
"num_tokens": 321849062.0,
"reward": 0.9769848823547364,
"reward_std": 0.12771027386188508,
"rewards/accuracy_reward": 0.677343738079071,
"rewards/brier_reward": 0.8088360667228699,
"rewards/confidence_uniqueness_reward": 0.937406325340271,
"rewards/format_reward": 0.9876736044883728,
"rewards/frontier_coverage_0": 0.024465531995519995,
"rewards/frontier_coverage_1": 0.024465531995519995,
"rewards/frontier_coverage_10": 0.024465531995519995,
"rewards/frontier_coverage_15": 0.024465531995519995,
"rewards/frontier_coverage_20": 0.024465531995519995,
"rewards/frontier_coverage_25": 0.026557547226548194,
"rewards/frontier_coverage_5": 0.024465531995519995,
"rewards/frontier_entropy_batch_reward": -0.326269394159317,
"signal/accuracy_reward/centered_abs_mean": 0.15506184995174407,
"signal/accuracy_reward/group_std_mean": 0.20306468904018402,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0282334446907044,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07753092497587204,
"signal/advantage_abs_mean": 0.7636637091636658,
"signal/advantage_pre_scale_abs_mean": 0.09637981355190277,
"signal/advantage_pre_scale_std": 0.15364661812782288,
"signal/advantage_std": 0.9831578254699707,
"signal/brier_reward/centered_abs_mean": 0.1285212144255638,
"signal/brier_reward/group_std_mean": 0.16701798737049103,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17068175673484803,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012852122262120246,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03288968279957771,
"signal/confidence_uniqueness_reward/group_std_mean": 0.053009679168462755,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04366851449012756,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032889683730900287,
"signal/format_reward/centered_abs_mean": 0.02045355923473835,
"signal/format_reward/group_std_mean": 0.03811209574341774,
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.13571836948394775,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010226779617369175,
"signal/frontier_coverage_0/centered_abs_mean": 0.16244593560695647,
"signal/frontier_coverage_0/group_std_mean": 0.21394776403903962,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030822818726301195,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023229768965393307,
"signal/frontier_coverage_1/centered_abs_mean": 0.16244593560695647,
"signal/frontier_coverage_1/group_std_mean": 0.21394776403903962,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030822818726301195,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023229768965393307,
"signal/frontier_coverage_10/centered_abs_mean": 0.16244593560695647,
"signal/frontier_coverage_10/group_std_mean": 0.21394776403903962,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030822818726301195,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023229768965393307,
"signal/frontier_coverage_15/centered_abs_mean": 0.16244593560695647,
"signal/frontier_coverage_15/group_std_mean": 0.21394776403903962,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030822818726301195,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023229768965393307,
"signal/frontier_coverage_20/centered_abs_mean": 0.16244593560695647,
"signal/frontier_coverage_20/group_std_mean": 0.21394776403903962,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030822818726301195,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023229768965393307,
"signal/frontier_coverage_25/centered_abs_mean": 0.1387791097164154,
"signal/frontier_coverage_25/group_std_mean": 0.18441854119300843,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026340827718377112,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001984541164711118,
"signal/frontier_coverage_5/centered_abs_mean": 0.16244593560695647,
"signal/frontier_coverage_5/group_std_mean": 0.21394776403903962,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030822818726301195,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023229768965393307,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3421880781650543,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40926730036735537,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4542033314704895,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03421880826354027,
"step": 160
},
{
"calibration/aurc": 0.11436634074025849,
"calibration/batch_distribution_entropy": 0.9617849759011884,
"calibration/buffer_distribution_entropy": 0.9484377352757761,
"calibration/confidence_entropy": 0.4930776484401013,
"calibration/coverage@0%": 0.10334389333167812,
"calibration/coverage@1%": 0.11740639333167813,
"calibration/coverage@10%": 0.5556207287320357,
"calibration/coverage@15%": 0.7181706536448791,
"calibration/coverage@20%": 0.785286071794031,
"calibration/coverage@25%": 0.8616119070998108,
"calibration/coverage@30%": 0.9275821322181891,
"calibration/coverage@5%": 0.4210177667394427,
"calibration/ece": 0.17937487551933348,
"calibration/mean_confidence": 0.5590953376917799,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015451388888888907,
"completions/max_length": 3181.6,
"completions/max_terminated_length": 3181.6,
"completions/mean_length": 711.1984497070313,
"completions/mean_terminated_length": 722.4953369140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 204.8,
"epoch": 0.39599505006187424,
"grad_norm": 0.0024608452804386616,
"learning_rate": 3.966346153846154e-06,
"loss": -0.035,
"num_tokens": 333181140.0,
"reward": 0.9782179236412049,
"reward_std": 0.12205760926008224,
"rewards/accuracy_reward": 0.6736111044883728,
"rewards/brier_reward": 0.8028805494308472,
"rewards/confidence_uniqueness_reward": 0.9423322916030884,
"rewards/format_reward": 0.9844618082046509,
"rewards/frontier_coverage_0": 0.028045324282720684,
"rewards/frontier_coverage_1": 0.028045324282720684,
"rewards/frontier_coverage_10": 0.028045324282720684,
"rewards/frontier_coverage_15": 0.028045324282720684,
"rewards/frontier_coverage_20": 0.03051578577142209,
"rewards/frontier_coverage_25": 0.03762260042130947,
"rewards/frontier_coverage_5": 0.028045324282720684,
"rewards/frontier_entropy_batch_reward": -0.2831945657730103,
"signal/accuracy_reward/centered_abs_mean": 0.13511284589767455,
"signal/accuracy_reward/group_std_mean": 0.18115203380584716,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9182628631591797,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06755642294883728,
"signal/advantage_abs_mean": 0.7523734450340271,
"signal/advantage_pre_scale_abs_mean": 0.09113254398107529,
"signal/advantage_pre_scale_std": 0.14792825877666474,
"signal/advantage_std": 0.9831262826919556,
"signal/brier_reward/centered_abs_mean": 0.13245978951454163,
"signal/brier_reward/group_std_mean": 0.170314958691597,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18007222414016724,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013245978951454162,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03488457053899765,
"signal/confidence_uniqueness_reward/group_std_mean": 0.054613684117794034,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04710889980196953,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034884572960436344,
"signal/format_reward/centered_abs_mean": 0.024462890625,
"signal/format_reward/group_std_mean": 0.04212077111005783,
"signal/format_reward/group_zero_std_frac": 0.8388889074325562,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.16409094184637069,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0122314453125,
"signal/frontier_coverage_0/centered_abs_mean": 0.17627032101154327,
"signal/frontier_coverage_0/group_std_mean": 0.22849083840847015,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034184883907437326,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025206655263900757,
"signal/frontier_coverage_1/centered_abs_mean": 0.17627032101154327,
"signal/frontier_coverage_1/group_std_mean": 0.22849083840847015,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034184883907437326,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025206655263900757,
"signal/frontier_coverage_10/centered_abs_mean": 0.17627032101154327,
"signal/frontier_coverage_10/group_std_mean": 0.22849083840847015,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.034184883907437326,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025206655263900757,
"signal/frontier_coverage_15/centered_abs_mean": 0.17627032101154327,
"signal/frontier_coverage_15/group_std_mean": 0.22849083840847015,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.034184883907437326,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025206655263900757,
"signal/frontier_coverage_20/centered_abs_mean": 0.16227281987667083,
"signal/frontier_coverage_20/group_std_mean": 0.21120948791503907,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.031467581540346144,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002320501208305359,
"signal/frontier_coverage_25/centered_abs_mean": 0.09800123274326325,
"signal/frontier_coverage_25/group_std_mean": 0.1284342259168625,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018975771404802798,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014014176325872541,
"signal/frontier_coverage_5/centered_abs_mean": 0.17627032101154327,
"signal/frontier_coverage_5/group_std_mean": 0.22849083840847015,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034184883907437326,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025206655263900757,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3317843735218048,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40056418180465697,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4519204914569855,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03317843824625015,
"step": 165
},
{
"calibration/aurc": 0.12897152930722725,
"calibration/batch_distribution_entropy": 0.9336085837086161,
"calibration/buffer_distribution_entropy": 0.9569373573788684,
"calibration/confidence_entropy": 0.47598063140135116,
"calibration/coverage@0%": 0.0282175713914672,
"calibration/coverage@1%": 0.0282175713914672,
"calibration/coverage@10%": 0.45547725333516764,
"calibration/coverage@15%": 0.6882758272706155,
"calibration/coverage@20%": 0.8536569754028251,
"calibration/coverage@25%": 0.9135881357982166,
"calibration/coverage@30%": 0.9688458907456269,
"calibration/coverage@5%": 0.19335502303391522,
"calibration/ece": 0.11628936449888494,
"calibration/mean_confidence": 0.629978176081961,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006857638888888884,
"completions/max_length": 3500.4,
"completions/max_terminated_length": 3500.4,
"completions/mean_length": 651.5045166015625,
"completions/mean_terminated_length": 656.08759765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.4079949000637492,
"grad_norm": 0.0028907046653330326,
"learning_rate": 4.086538461538462e-06,
"loss": -0.0206,
"num_tokens": 343775656.0,
"reward": 1.0022429823875427,
"reward_std": 0.1172142818570137,
"rewards/accuracy_reward": 0.7177951335906982,
"rewards/brier_reward": 0.8290866851806641,
"rewards/confidence_uniqueness_reward": 0.9474273324012756,
"rewards/format_reward": 0.9930555582046509,
"rewards/frontier_coverage_0": 0.02630518595688045,
"rewards/frontier_coverage_1": 0.02630518595688045,
"rewards/frontier_coverage_10": 0.02630518595688045,
"rewards/frontier_coverage_15": 0.02630518595688045,
"rewards/frontier_coverage_20": 0.03013472445309162,
"rewards/frontier_coverage_25": 0.05596128031611443,
"rewards/frontier_coverage_5": 0.02630518595688045,
"rewards/frontier_entropy_batch_reward": -0.33945736289024353,
"signal/accuracy_reward/centered_abs_mean": 0.14037001132965088,
"signal/accuracy_reward/group_std_mean": 0.18953485488891603,
"signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9717832684516907,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07018500566482544,
"signal/advantage_abs_mean": 0.7606682658195496,
"signal/advantage_pre_scale_abs_mean": 0.08667757511138915,
"signal/advantage_pre_scale_std": 0.13898763060569763,
"signal/advantage_std": 0.9830931544303894,
"signal/brier_reward/centered_abs_mean": 0.12069161683320999,
"signal/brier_reward/group_std_mean": 0.15923964679241182,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16777142584323884,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012069161795079709,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025993842631578445,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04322640188038349,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035857266560196874,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002599384170025587,
"signal/format_reward/centered_abs_mean": 0.012814670242369175,
"signal/format_reward/group_std_mean": 0.027250981703400613,
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08731953650712967,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006407335121184588,
"signal/frontier_coverage_0/centered_abs_mean": 0.16332647502422332,
"signal/frontier_coverage_0/group_std_mean": 0.21512860357761382,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03251932114362717,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002335568470880389,
"signal/frontier_coverage_1/centered_abs_mean": 0.16332647502422332,
"signal/frontier_coverage_1/group_std_mean": 0.21512860357761382,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03251932114362717,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002335568470880389,
"signal/frontier_coverage_10/centered_abs_mean": 0.16332647502422332,
"signal/frontier_coverage_10/group_std_mean": 0.21512860357761382,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03251932114362717,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002335568470880389,
"signal/frontier_coverage_15/centered_abs_mean": 0.16332647502422332,
"signal/frontier_coverage_15/group_std_mean": 0.21512860357761382,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03251932114362717,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002335568470880389,
"signal/frontier_coverage_20/centered_abs_mean": 0.1249047115445137,
"signal/frontier_coverage_20/group_std_mean": 0.1664392739534378,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024858567118644714,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001786137348972261,
"signal/frontier_coverage_25/centered_abs_mean": 0.06625153496861458,
"signal/frontier_coverage_25/group_std_mean": 0.08540613204240799,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01316972803324461,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009473969344981015,
"signal/frontier_coverage_5/centered_abs_mean": 0.16332647502422332,
"signal/frontier_coverage_5/group_std_mean": 0.21512860357761382,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03251932114362717,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335568470880389,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3586002290248871,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4223356068134308,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49831503033638,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03586002364754677,
"step": 170
},
{
"calibration/aurc": 0.10554713818197388,
"calibration/batch_distribution_entropy": 0.9680069135619707,
"calibration/buffer_distribution_entropy": 0.9637635731465585,
"calibration/confidence_entropy": 0.48449433471909914,
"calibration/coverage@0%": 0.06158700047098672,
"calibration/coverage@1%": 0.11685015836572354,
"calibration/coverage@10%": 0.5591526611609484,
"calibration/coverage@15%": 0.7391989335478721,
"calibration/coverage@20%": 0.8493839489032611,
"calibration/coverage@25%": 0.9431748076624615,
"calibration/coverage@30%": 0.9935828877005347,
"calibration/coverage@5%": 0.34651033746497045,
"calibration/ece": 0.19750187584352247,
"calibration/mean_confidence": 0.5472111987602275,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01710069444444442,
"completions/max_length": 3739.6,
"completions/max_terminated_length": 3739.6,
"completions/mean_length": 642.9829956054688,
"completions/mean_terminated_length": 654.1280395507813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.8,
"epoch": 0.4199947500656242,
"grad_norm": 0.0027688194531947374,
"learning_rate": 4.20673076923077e-06,
"loss": -0.0329,
"num_tokens": 354290788.0,
"reward": 0.9870093107223511,
"reward_std": 0.1387052059173584,
"rewards/accuracy_reward": 0.694444453716278,
"rewards/brier_reward": 0.8080734610557556,
"rewards/confidence_uniqueness_reward": 0.9381106615066528,
"rewards/format_reward": 0.9815104126930236,
"rewards/frontier_coverage_0": 0.03009704500436783,
"rewards/frontier_coverage_1": 0.03009704500436783,
"rewards/frontier_coverage_10": 0.03009704500436783,
"rewards/frontier_coverage_15": 0.029873811826109885,
"rewards/frontier_coverage_20": 0.03443767204880714,
"rewards/frontier_coverage_25": 0.07700650915503501,
"rewards/frontier_coverage_5": 0.03009704500436783,
"rewards/frontier_entropy_batch_reward": -0.2932896614074707,
"signal/accuracy_reward/centered_abs_mean": 0.1675889790058136,
"signal/accuracy_reward/group_std_mean": 0.22164686918258666,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.03622624874115,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0837944895029068,
"signal/advantage_abs_mean": 0.7442346930503845,
"signal/advantage_pre_scale_abs_mean": 0.1012690544128418,
"signal/advantage_pre_scale_std": 0.16532301008701325,
"signal/advantage_std": 0.9832346558570861,
"signal/brier_reward/centered_abs_mean": 0.1395539104938507,
"signal/brier_reward/group_std_mean": 0.17974555790424346,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17347123324871064,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013955391198396682,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04072441086173058,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06787059977650642,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.050070621073246,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0040724413003772495,
"signal/format_reward/centered_abs_mean": 0.03021375834941864,
"signal/format_reward/group_std_mean": 0.055511254072189334,
"signal/format_reward/group_zero_std_frac": 0.7750000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.18372004330158234,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01510687917470932,
"signal/frontier_coverage_0/centered_abs_mean": 0.2040518641471863,
"signal/frontier_coverage_0/group_std_mean": 0.26374197006225586,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03635032847523689,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029179416596889496,
"signal/frontier_coverage_1/centered_abs_mean": 0.2040518641471863,
"signal/frontier_coverage_1/group_std_mean": 0.26374197006225586,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03635032847523689,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029179416596889496,
"signal/frontier_coverage_10/centered_abs_mean": 0.2040518641471863,
"signal/frontier_coverage_10/group_std_mean": 0.26374197006225586,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03635032847523689,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029179416596889496,
"signal/frontier_coverage_15/centered_abs_mean": 0.2027619570493698,
"signal/frontier_coverage_15/group_std_mean": 0.2620996594429016,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.036139412224292754,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028994960244745015,
"signal/frontier_coverage_20/centered_abs_mean": 0.11568820774555207,
"signal/frontier_coverage_20/group_std_mean": 0.15245377123355866,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020674470439553262,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016543413512408733,
"signal/frontier_coverage_25/centered_abs_mean": 0.07027304172515869,
"signal/frontier_coverage_25/group_std_mean": 0.0893691822886467,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01256355717778206,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001004904485307634,
"signal/frontier_coverage_5/centered_abs_mean": 0.2040518641471863,
"signal/frontier_coverage_5/group_std_mean": 0.26374197006225586,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03635032847523689,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029179416596889496,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3387927234172821,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4076977729797363,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4237474262714386,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03387927338480949,
"step": 175
},
{
"calibration/aurc": 0.10984275074604351,
"calibration/batch_distribution_entropy": 0.9317807197735644,
"calibration/buffer_distribution_entropy": 0.9691957594816449,
"calibration/confidence_entropy": 0.5229593459895739,
"calibration/coverage@0%": 0.09351867236232028,
"calibration/coverage@1%": 0.10664203194237279,
"calibration/coverage@10%": 0.6353146219608317,
"calibration/coverage@15%": 0.7640279202423726,
"calibration/coverage@20%": 0.857970503140216,
"calibration/coverage@25%": 0.9141302660362711,
"calibration/coverage@30%": 0.9506386281843202,
"calibration/coverage@5%": 0.3119994714169439,
"calibration/ece": 0.15585108375284248,
"calibration/mean_confidence": 0.6108854745161485,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020833333333333325,
"completions/max_length": 3743.2,
"completions/max_terminated_length": 3743.2,
"completions/mean_length": 588.3677001953125,
"completions/mean_terminated_length": 600.9534301757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.8,
"epoch": 0.4319946000674992,
"grad_norm": 0.0034114145673811436,
"learning_rate": 4.326923076923077e-06,
"loss": -0.0492,
"num_tokens": 364168752.0,
"reward": 0.9856754302978515,
"reward_std": 0.1391789510846138,
"rewards/accuracy_reward": 0.7072048664093018,
"rewards/brier_reward": 0.8044279932975769,
"rewards/confidence_uniqueness_reward": 0.9358467817306518,
"rewards/format_reward": 0.9791666626930237,
"rewards/frontier_coverage_0": 0.003940967842936516,
"rewards/frontier_coverage_1": 0.003940967842936516,
"rewards/frontier_coverage_10": 0.003940967842936516,
"rewards/frontier_coverage_15": 0.005169083643704653,
"rewards/frontier_coverage_20": 0.02603294886648655,
"rewards/frontier_coverage_25": 0.08643961250782013,
"rewards/frontier_coverage_5": 0.003940967842936516,
"rewards/frontier_entropy_batch_reward": -0.3344554424285889,
"signal/accuracy_reward/centered_abs_mean": 0.14794379472732544,
"signal/accuracy_reward/group_std_mean": 0.1974938452243805,
"signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9245458722114563,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07397189736366272,
"signal/advantage_abs_mean": 0.7475542187690735,
"signal/advantage_pre_scale_abs_mean": 0.10126374959945679,
"signal/advantage_pre_scale_std": 0.16907794177532195,
"signal/advantage_std": 0.9832310676574707,
"signal/brier_reward/centered_abs_mean": 0.12277870327234268,
"signal/brier_reward/group_std_mean": 0.16272760629653932,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15394430458545685,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012277870066463947,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04358867034316063,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07177197933197021,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05445105582475662,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004358867136761546,
"signal/format_reward/centered_abs_mean": 0.03280164897441864,
"signal/format_reward/group_std_mean": 0.05909553095698357,
"signal/format_reward/group_zero_std_frac": 0.7666666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2044661432504654,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01640082448720932,
"signal/frontier_coverage_0/centered_abs_mean": 0.15614522099494935,
"signal/frontier_coverage_0/group_std_mean": 0.20503021478652955,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.027985046431422234,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002232876745983958,
"signal/frontier_coverage_1/centered_abs_mean": 0.15614522099494935,
"signal/frontier_coverage_1/group_std_mean": 0.20503021478652955,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.027985046431422234,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002232876745983958,
"signal/frontier_coverage_10/centered_abs_mean": 0.15614522099494935,
"signal/frontier_coverage_10/group_std_mean": 0.20503021478652955,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027985046431422234,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002232876745983958,
"signal/frontier_coverage_15/centered_abs_mean": 0.14456919133663176,
"signal/frontier_coverage_15/group_std_mean": 0.19029048085212708,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02592291831970215,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020673394668847323,
"signal/frontier_coverage_20/centered_abs_mean": 0.06173940449953079,
"signal/frontier_coverage_20/group_std_mean": 0.08230031579732895,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01108500100672245,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008828734629787504,
"signal/frontier_coverage_25/centered_abs_mean": 0.07278510332107543,
"signal/frontier_coverage_25/group_std_mean": 0.09394310265779496,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013049699179828168,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010408269474282861,
"signal/frontier_coverage_5/centered_abs_mean": 0.15614522099494935,
"signal/frontier_coverage_5/group_std_mean": 0.20503021478652955,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.027985046431422234,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002232876745983958,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34572470784187315,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41141175031661986,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4329352915287018,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03457247242331505,
"step": 180
},
{
"calibration/aurc": 0.15111109398849837,
"calibration/batch_distribution_entropy": 0.9566626630611218,
"calibration/buffer_distribution_entropy": 0.9735827817632021,
"calibration/confidence_entropy": 0.48504004275740165,
"calibration/coverage@0%": 0.04806912259147791,
"calibration/coverage@1%": 0.04806912259147791,
"calibration/coverage@10%": 0.3392314701725535,
"calibration/coverage@15%": 0.5697117101815119,
"calibration/coverage@20%": 0.8393673770355757,
"calibration/coverage@25%": 0.9303207856198131,
"calibration/coverage@30%": 0.9753280839895012,
"calibration/coverage@5%": 0.0841373631284347,
"calibration/ece": 0.18843498871215267,
"calibration/mean_confidence": 0.5700071762667023,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006944444444444464,
"completions/max_length": 3202.2,
"completions/max_terminated_length": 3202.2,
"completions/mean_length": 623.2223266601562,
"completions/mean_terminated_length": 627.6921752929687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.8,
"epoch": 0.44399445006937416,
"grad_norm": 0.003752121701836586,
"learning_rate": 4.447115384615385e-06,
"loss": -0.0148,
"num_tokens": 374438321.0,
"reward": 0.9892779350280761,
"reward_std": 0.12258573472499848,
"rewards/accuracy_reward": 0.6779513835906983,
"rewards/brier_reward": 0.8098243832588196,
"rewards/confidence_uniqueness_reward": 0.9467748641967774,
"rewards/format_reward": 0.9930555582046509,
"rewards/frontier_coverage_0": 0.03621828258037567,
"rewards/frontier_coverage_1": 0.03621828258037567,
"rewards/frontier_coverage_10": 0.03621828258037567,
"rewards/frontier_coverage_15": 0.03810288608074188,
"rewards/frontier_coverage_20": 0.0494185097515583,
"rewards/frontier_coverage_25": 0.10902320891618729,
"rewards/frontier_coverage_5": 0.03621828258037567,
"rewards/frontier_entropy_batch_reward": -0.26767775118350984,
"signal/accuracy_reward/centered_abs_mean": 0.15885416567325591,
"signal/accuracy_reward/group_std_mean": 0.21068883836269378,
"signal/accuracy_reward/group_zero_std_frac": 0.4,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.08146892786026,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07942708283662796,
"signal/advantage_abs_mean": 0.753273355960846,
"signal/advantage_pre_scale_abs_mean": 0.0918624609708786,
"signal/advantage_pre_scale_std": 0.14460960030555725,
"signal/advantage_std": 0.9831261992454529,
"signal/brier_reward/centered_abs_mean": 0.12728632986545563,
"signal/brier_reward/group_std_mean": 0.1645742654800415,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17356542944908143,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012728632800281048,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024496791139245033,
"signal/confidence_uniqueness_reward/group_std_mean": 0.039763347059488294,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.033292872831225395,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002449679165147245,
"signal/format_reward/centered_abs_mean": 0.012217881716787815,
"signal/format_reward/group_std_mean": 0.024743243120610714,
"signal/format_reward/group_zero_std_frac": 0.8916666865348816,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08302046582102776,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006108940858393907,
"signal/frontier_coverage_0/centered_abs_mean": 0.18843339681625365,
"signal/frontier_coverage_0/group_std_mean": 0.24661438167095184,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03683679588139057,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026945976074784993,
"signal/frontier_coverage_1/centered_abs_mean": 0.18843339681625365,
"signal/frontier_coverage_1/group_std_mean": 0.24661438167095184,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03683679588139057,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026945976074784993,
"signal/frontier_coverage_10/centered_abs_mean": 0.18843339681625365,
"signal/frontier_coverage_10/group_std_mean": 0.24661438167095184,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03683679588139057,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026945976074784993,
"signal/frontier_coverage_15/centered_abs_mean": 0.1579059839248657,
"signal/frontier_coverage_15/group_std_mean": 0.20779311954975127,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030845557898283006,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002258055657148361,
"signal/frontier_coverage_20/centered_abs_mean": 0.06493410244584083,
"signal/frontier_coverage_20/group_std_mean": 0.08309292197227477,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012669848836958408,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009285576525144279,
"signal/frontier_coverage_25/centered_abs_mean": 0.07968129962682724,
"signal/frontier_coverage_25/group_std_mean": 0.10209451913833618,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015482756868004799,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011394425993785263,
"signal/frontier_coverage_5/centered_abs_mean": 0.18843339681625365,
"signal/frontier_coverage_5/group_std_mean": 0.24661438167095184,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03683679588139057,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026945976074784993,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32011584639549256,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3904758870601654,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4340623140335083,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03201158419251442,
"step": 185
},
{
"calibration/aurc": 0.14893818169032144,
"calibration/batch_distribution_entropy": 0.9419705194159368,
"calibration/buffer_distribution_entropy": 0.9776862355785289,
"calibration/confidence_entropy": 0.46790947060759625,
"calibration/coverage@0%": 0.028810999020120808,
"calibration/coverage@1%": 0.08416347943787539,
"calibration/coverage@10%": 0.36775327198894214,
"calibration/coverage@15%": 0.48721496613020426,
"calibration/coverage@20%": 0.6907456911063473,
"calibration/coverage@25%": 0.9141684616811642,
"calibration/coverage@30%": 0.9921424743373046,
"calibration/coverage@5%": 0.2703035088939899,
"calibration/ece": 0.169163026326467,
"calibration/mean_confidence": 0.6032991320164142,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004166666666666652,
"completions/max_length": 3103.0,
"completions/max_terminated_length": 3103.0,
"completions/mean_length": 674.9318725585938,
"completions/mean_terminated_length": 677.7634643554687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 210.6,
"epoch": 0.45599430007124914,
"grad_norm": 0.004520408343523741,
"learning_rate": 4.567307692307692e-06,
"loss": 0.0002,
"num_tokens": 385296480.0,
"reward": 1.0066398620605468,
"reward_std": 0.119065323472023,
"rewards/accuracy_reward": 0.71484375,
"rewards/brier_reward": 0.8188209176063538,
"rewards/confidence_uniqueness_reward": 0.9446268916130066,
"rewards/format_reward": 0.9955729126930237,
"rewards/frontier_coverage_0": 0.02538626336026937,
"rewards/frontier_coverage_1": 0.02538626336026937,
"rewards/frontier_coverage_10": 0.02538626336026937,
"rewards/frontier_coverage_15": 0.03268268760293722,
"rewards/frontier_coverage_20": 0.06400988847017289,
"rewards/frontier_coverage_25": 0.13977613300085068,
"rewards/frontier_coverage_5": 0.02538626336026937,
"rewards/frontier_entropy_batch_reward": -0.2974688410758972,
"signal/accuracy_reward/centered_abs_mean": 0.1567545562982559,
"signal/accuracy_reward/group_std_mean": 0.2077132999897003,
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0586259484291076,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07837727814912795,
"signal/advantage_abs_mean": 0.7523825168609619,
"signal/advantage_pre_scale_abs_mean": 0.08916229903697967,
"signal/advantage_pre_scale_std": 0.1393636554479599,
"signal/advantage_std": 0.9831319808959961,
"signal/brier_reward/centered_abs_mean": 0.12696570456027984,
"signal/brier_reward/group_std_mean": 0.1662917345762253,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17168731689453126,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01269657090306282,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021785502135753632,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0352114200592041,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02974345088005066,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021785502322018147,
"signal/format_reward/centered_abs_mean": 0.008241102332249284,
"signal/format_reward/group_std_mean": 0.018836847506463528,
"signal/format_reward/group_zero_std_frac": 0.9111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05659685656428337,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004120551166124642,
"signal/frontier_coverage_0/centered_abs_mean": 0.1937331348657608,
"signal/frontier_coverage_0/group_std_mean": 0.25190245509147646,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037381567806005475,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002770383842289448,
"signal/frontier_coverage_1/centered_abs_mean": 0.1937331348657608,
"signal/frontier_coverage_1/group_std_mean": 0.25190245509147646,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037381567806005475,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002770383842289448,
"signal/frontier_coverage_10/centered_abs_mean": 0.1937331348657608,
"signal/frontier_coverage_10/group_std_mean": 0.25190245509147646,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.037381567806005475,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002770383842289448,
"signal/frontier_coverage_15/centered_abs_mean": 0.1363177239894867,
"signal/frontier_coverage_15/group_std_mean": 0.17888247072696686,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026297363638877868,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001949343434534967,
"signal/frontier_coverage_20/centered_abs_mean": 0.06703521385788917,
"signal/frontier_coverage_20/group_std_mean": 0.08440038710832595,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013008480705320836,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009586036554537714,
"signal/frontier_coverage_25/centered_abs_mean": 0.09278584271669388,
"signal/frontier_coverage_25/group_std_mean": 0.11911198645830154,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01804537754505873,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013268375769257545,
"signal/frontier_coverage_5/centered_abs_mean": 0.1937331348657608,
"signal/frontier_coverage_5/group_std_mean": 0.25190245509147646,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037381567806005475,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002770383842289448,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34055811166763306,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.409438556432724,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4618579685688019,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0340558111667633,
"step": 190
},
{
"calibration/aurc": 0.207823017908103,
"calibration/batch_distribution_entropy": 0.9731538814379868,
"calibration/buffer_distribution_entropy": 0.979395058792719,
"calibration/confidence_entropy": 0.5097420144557466,
"calibration/coverage@0%": 0.011067546149970225,
"calibration/coverage@1%": 0.011067546149970225,
"calibration/coverage@10%": 0.2447367693875681,
"calibration/coverage@15%": 0.42390932553141825,
"calibration/coverage@20%": 0.5590823296623799,
"calibration/coverage@25%": 0.7296307994788032,
"calibration/coverage@30%": 0.8170445344129554,
"calibration/coverage@5%": 0.09602072282534012,
"calibration/ece": 0.14993556714871595,
"calibration/mean_confidence": 0.541932387510283,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004253472222222254,
"completions/max_length": 3484.8,
"completions/max_terminated_length": 3484.8,
"completions/mean_length": 737.6642333984375,
"completions/mean_terminated_length": 740.8277221679688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 193.2,
"epoch": 0.46799415007312406,
"grad_norm": 0.005371047183871269,
"learning_rate": 4.6875000000000004e-06,
"loss": -0.0067,
"num_tokens": 396875236.0,
"reward": 0.9803600549697876,
"reward_std": 0.12534761279821396,
"rewards/accuracy_reward": 0.6599826455116272,
"rewards/brier_reward": 0.8082576751708984,
"rewards/confidence_uniqueness_reward": 0.9455493211746215,
"rewards/format_reward": 0.9950520873069764,
"rewards/frontier_coverage_0": 0.038907221704721454,
"rewards/frontier_coverage_1": 0.038907221704721454,
"rewards/frontier_coverage_10": 0.038907221704721454,
"rewards/frontier_coverage_15": 0.04050202891230583,
"rewards/frontier_coverage_20": 0.05746806710958481,
"rewards/frontier_coverage_25": 0.11870489567518235,
"rewards/frontier_coverage_5": 0.038907221704721454,
"rewards/frontier_entropy_batch_reward": -0.27861965298652647,
"signal/accuracy_reward/centered_abs_mean": 0.16305881440639497,
"signal/accuracy_reward/group_std_mean": 0.21655304729938507,
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9943400859832764,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08152940720319748,
"signal/advantage_abs_mean": 0.7515913486480713,
"signal/advantage_pre_scale_abs_mean": 0.09461424648761749,
"signal/advantage_pre_scale_std": 0.14401794373989105,
"signal/advantage_std": 0.9832587242126465,
"signal/brier_reward/centered_abs_mean": 0.1281513586640358,
"signal/brier_reward/group_std_mean": 0.1668252170085907,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1567935198545456,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012815136276185513,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022215939313173293,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03495916984975338,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02720388360321522,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022215940058231353,
"signal/format_reward/centered_abs_mean": 0.008881293330341577,
"signal/format_reward/group_std_mean": 0.018633856624364852,
"signal/format_reward/group_zero_std_frac": 0.9166666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05408404804766178,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004440646665170789,
"signal/frontier_coverage_0/centered_abs_mean": 0.1843973457813263,
"signal/frontier_coverage_0/group_std_mean": 0.2422202616930008,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03211207017302513,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002636882150545716,
"signal/frontier_coverage_1/centered_abs_mean": 0.1843973457813263,
"signal/frontier_coverage_1/group_std_mean": 0.2422202616930008,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03211207017302513,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002636882150545716,
"signal/frontier_coverage_10/centered_abs_mean": 0.1843973457813263,
"signal/frontier_coverage_10/group_std_mean": 0.2422202616930008,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03211207017302513,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002636882150545716,
"signal/frontier_coverage_15/centered_abs_mean": 0.10657975375652314,
"signal/frontier_coverage_15/group_std_mean": 0.1422801896929741,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018575644865632058,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015240905107930303,
"signal/frontier_coverage_20/centered_abs_mean": 0.062387507408857346,
"signal/frontier_coverage_20/group_std_mean": 0.08020298928022385,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.010951629839837551,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008921413565985859,
"signal/frontier_coverage_25/centered_abs_mean": 0.09733576774597168,
"signal/frontier_coverage_25/group_std_mean": 0.12596500515937806,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017124542221426963,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013919014250859617,
"signal/frontier_coverage_5/centered_abs_mean": 0.1843973457813263,
"signal/frontier_coverage_5/group_std_mean": 0.2422202616930008,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03211207017302513,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002636882150545716,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3384637773036957,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40933026671409606,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41559439301490786,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03384637758135796,
"step": 195
},
{
"calibration/aurc": 0.13457394207174084,
"calibration/batch_distribution_entropy": 0.9660387662645806,
"calibration/buffer_distribution_entropy": 0.9791850307731931,
"calibration/confidence_entropy": 0.49642053345282927,
"calibration/coverage@0%": 0.03922876835584915,
"calibration/coverage@1%": 0.03922876835584915,
"calibration/coverage@10%": 0.5373616414095859,
"calibration/coverage@15%": 0.6125576703375405,
"calibration/coverage@20%": 0.8448078692137642,
"calibration/coverage@25%": 0.9158964487671957,
"calibration/coverage@30%": 0.9644429098943311,
"calibration/coverage@5%": 0.21529847983954314,
"calibration/ece": 0.17658624157088154,
"calibration/mean_confidence": 0.5825247638644665,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00425347222222221,
"completions/max_length": 3652.0,
"completions/max_terminated_length": 3652.0,
"completions/mean_length": 811.3713623046875,
"completions/mean_terminated_length": 814.8777954101563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 248.6,
"epoch": 0.47999400007499904,
"grad_norm": 0.005334607325494289,
"learning_rate": 4.807692307692308e-06,
"loss": 0.0016,
"num_tokens": 409290042.0,
"reward": 0.9891631245613098,
"reward_std": 0.12703752517700195,
"rewards/accuracy_reward": 0.6722222208976746,
"rewards/brier_reward": 0.8120310187339783,
"rewards/confidence_uniqueness_reward": 0.9465744733810425,
"rewards/format_reward": 0.9953993082046508,
"rewards/frontier_coverage_0": 0.04383779689669609,
"rewards/frontier_coverage_1": 0.04383779689669609,
"rewards/frontier_coverage_10": 0.043836929649114606,
"rewards/frontier_coverage_15": 0.04154842011630535,
"rewards/frontier_coverage_20": 0.07352328151464463,
"rewards/frontier_coverage_25": 0.144034680724144,
"rewards/frontier_coverage_5": 0.04383779689669609,
"rewards/frontier_entropy_batch_reward": -0.26720958948135376,
"signal/accuracy_reward/centered_abs_mean": 0.17158203125,
"signal/accuracy_reward/group_std_mean": 0.22611615359783171,
"signal/accuracy_reward/group_zero_std_frac": 0.35833333134651185,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0878794550895692,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.085791015625,
"signal/advantage_abs_mean": 0.7504582166671753,
"signal/advantage_pre_scale_abs_mean": 0.09581226408481598,
"signal/advantage_pre_scale_std": 0.14724062085151673,
"signal/advantage_std": 0.9832112431526184,
"signal/brier_reward/centered_abs_mean": 0.129945769906044,
"signal/brier_reward/group_std_mean": 0.1687883585691452,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16505386531352997,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012994576990604401,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020943275094032286,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03341059945523739,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026448329165577888,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002094327541999519,
"signal/format_reward/centered_abs_mean": 0.008273654524236918,
"signal/format_reward/group_std_mean": 0.017920159548521043,
"signal/format_reward/group_zero_std_frac": 0.9166666865348816,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.051681911945343016,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004136827262118459,
"signal/frontier_coverage_0/centered_abs_mean": 0.19716570079326629,
"signal/frontier_coverage_0/group_std_mean": 0.25832314491271974,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035849443450570104,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028194695245474578,
"signal/frontier_coverage_1/centered_abs_mean": 0.19716570079326629,
"signal/frontier_coverage_1/group_std_mean": 0.25832314491271974,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035849443450570104,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028194695245474578,
"signal/frontier_coverage_10/centered_abs_mean": 0.197145015001297,
"signal/frontier_coverage_10/group_std_mean": 0.25829660296440127,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.035845917835831645,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00281917373649776,
"signal/frontier_coverage_15/centered_abs_mean": 0.09588005095720291,
"signal/frontier_coverage_15/group_std_mean": 0.126824252307415,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01748826839029789,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013710847357288002,
"signal/frontier_coverage_20/centered_abs_mean": 0.06827102303504944,
"signal/frontier_coverage_20/group_std_mean": 0.08614148795604706,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012408490851521492,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009762756177224219,
"signal/frontier_coverage_25/centered_abs_mean": 0.10345425456762314,
"signal/frontier_coverage_25/group_std_mean": 0.1329213485121727,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018771519511938096,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014793958282098175,
"signal/frontier_coverage_5/centered_abs_mean": 0.19716570079326629,
"signal/frontier_coverage_5/group_std_mean": 0.25832314491271974,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035849443450570104,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028194695245474578,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3215024173259735,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3918437123298645,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40881314873695374,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03215024285018444,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_calibration/aurc": 0.12598091095806183,
"eval_calibration/batch_distribution_entropy": 0.9433110585373359,
"eval_calibration/buffer_distribution_entropy": 0.9790980924717448,
"eval_calibration/confidence_entropy": 0.47056662694642887,
"eval_calibration/coverage@0%": 0.2814180107526882,
"eval_calibration/coverage@1%": 0.2814180107526882,
"eval_calibration/coverage@10%": 0.5105846774193549,
"eval_calibration/coverage@15%": 0.6851478494623656,
"eval_calibration/coverage@20%": 0.8158602150537634,
"eval_calibration/coverage@25%": 0.8785282258064516,
"eval_calibration/coverage@30%": 0.9680779569892474,
"eval_calibration/coverage@5%": 0.3022513440860215,
"eval_calibration/ece": 0.19921678024193548,
"eval_calibration/mean_confidence": 0.5472353030913978,
"eval_completions/clipped_ratio": 0.004340277777777772,
"eval_completions/max_length": 2724.8333333333335,
"eval_completions/max_terminated_length": 2724.8333333333335,
"eval_completions/mean_length": 862.3639221191406,
"eval_completions/mean_terminated_length": 866.1793721516927,
"eval_completions/min_length": 175.5,
"eval_completions/min_terminated_length": 317.0,
"eval_loss": 0.0,
"eval_num_tokens": 409290042.0,
"eval_reward": 0.9056529303391775,
"eval_reward_std": 0.2260978470245997,
"eval_rewards/accuracy_reward": 0.6614583333333334,
"eval_rewards/brier_reward": 0.8041390081246694,
"eval_rewards/confidence_uniqueness_reward": 0.8975274364153544,
"eval_rewards/format_reward": 0.995659718910853,
"eval_rewards/frontier_coverage_0": 0.046538424057265125,
"eval_rewards/frontier_coverage_1": 0.046538424057265125,
"eval_rewards/frontier_coverage_10": 0.046553870352605976,
"eval_rewards/frontier_coverage_15": 0.04254821936289469,
"eval_rewards/frontier_coverage_20": 0.07779269541303317,
"eval_rewards/frontier_coverage_25": 0.14755996068318686,
"eval_rewards/frontier_coverage_5": 0.046538424057265125,
"eval_rewards/frontier_entropy_batch_reward": -0.995659718910853,
"eval_runtime": 176.4738,
"eval_samples_per_second": 5.667,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4331597238779068,
"eval_signal/accuracy_reward/group_std_mean": 0.47175751626491547,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9675879975159963,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2165798619389534,
"eval_signal/advantage_abs_mean": 0.8864832321802775,
"eval_signal/advantage_pre_scale_abs_mean": 0.20117887606223425,
"eval_signal/advantage_pre_scale_std": 0.22427177677551904,
"eval_signal/advantage_std": 0.9863830308119456,
"eval_signal/brier_reward/centered_abs_mean": 0.1935593287150065,
"eval_signal/brier_reward/group_std_mean": 0.24976551036039987,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08630472545822461,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019355932716280222,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04233323782682419,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0602133646607399,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01887299648175637,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042333238913367195,
"eval_signal/format_reward/centered_abs_mean": 0.008409287935743729,
"eval_signal/format_reward/group_std_mean": 0.02455231888840596,
"eval_signal/format_reward/group_zero_std_frac": 0.8611111243565878,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018258365492026012,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.004204643967871864,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.331520880262057,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4429255078236262,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.021158167781929176,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004740748554468155,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.331520880262057,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4429255078236262,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.021158167781929176,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004740748554468155,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.32940207918485004,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4403219074010849,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0210230794424812,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00471044968192776,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.12829044088721275,
"eval_signal/frontier_coverage_15/group_std_mean": 0.18191451330979666,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.008187860560913881,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018345532977643113,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10106631244222324,
"eval_signal/frontier_coverage_20/group_std_mean": 0.12813800697525343,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006464191324387987,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014452482379662495,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.19661433746417364,
"eval_signal/frontier_coverage_25/group_std_mean": 0.2403616358836492,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012586226065953573,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028115849321087203,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.331520880262057,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4429255078236262,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.021158167781929176,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004740748554468155,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008409287935743729,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.02455231888840596,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8611111243565878,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0036516734398901463,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008409288129769266,
"eval_steps_per_second": 0.034,
"step": 200
},
{
"epoch": 0.47999400007499904,
"step": 200,
"train_probe_calibration/aurc": 0.13770699054811322,
"train_probe_calibration/batch_distribution_entropy": 0.905888972116789,
"train_probe_calibration/buffer_distribution_entropy": 0.9790985272707301,
"train_probe_calibration/confidence_entropy": 0.4350313484976907,
"train_probe_calibration/coverage@0%": 0.21404569892473116,
"train_probe_calibration/coverage@1%": 0.21404569892473116,
"train_probe_calibration/coverage@10%": 0.5129368279569892,
"train_probe_calibration/coverage@15%": 0.6231518817204301,
"train_probe_calibration/coverage@20%": 0.7699932795698925,
"train_probe_calibration/coverage@25%": 0.8850806451612904,
"train_probe_calibration/coverage@30%": 0.96875,
"train_probe_calibration/coverage@5%": 0.21404569892473116,
"train_probe_calibration/ece": 0.21378480174731182,
"train_probe_calibration/mean_confidence": 0.5547951041666667,
"train_probe_completions/clipped_ratio": 0.006076388888888895,
"train_probe_completions/max_length": 3044.8333333333335,
"train_probe_completions/max_terminated_length": 3044.8333333333335,
"train_probe_completions/mean_length": 896.2515767415365,
"train_probe_completions/mean_terminated_length": 901.6541951497396,
"train_probe_completions/min_length": 72.66666666666667,
"train_probe_completions/min_terminated_length": 322.5,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 409290042.0,
"train_probe_reward": 0.9188018341859182,
"train_probe_reward_std": 0.22796061635017395,
"train_probe_rewards/accuracy_reward": 0.6901041567325592,
"train_probe_rewards/brier_reward": 0.8122913241386414,
"train_probe_rewards/confidence_uniqueness_reward": 0.8891710241635641,
"train_probe_rewards/format_reward": 0.9930555621782938,
"train_probe_rewards/frontier_coverage_0": 0.03560524402807156,
"train_probe_rewards/frontier_coverage_1": 0.03560524402807156,
"train_probe_rewards/frontier_coverage_10": 0.03576213649163643,
"train_probe_rewards/frontier_coverage_15": 0.043352426340182625,
"train_probe_rewards/frontier_coverage_20": 0.08913688485821088,
"train_probe_rewards/frontier_coverage_25": 0.17117570588986078,
"train_probe_rewards/frontier_coverage_5": 0.03560524402807156,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9930555621782938,
"train_probe_runtime": 200.3449,
"train_probe_samples_per_second": 4.991,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4161783804496129,
"train_probe_signal/accuracy_reward/group_std_mean": 0.46240218977133435,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9225195546944936,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20808919022480646,
"train_probe_signal/advantage_abs_mean": 0.8652854164441427,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.19722345719734827,
"train_probe_signal/advantage_pre_scale_std": 0.22617560625076294,
"train_probe_signal/advantage_std": 0.9863866766293844,
"train_probe_signal/brier_reward/centered_abs_mean": 0.19129946579535803,
"train_probe_signal/brier_reward/group_std_mean": 0.2488196368018786,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08478038261334102,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01912994698310892,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04761647308866183,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.07206781022250652,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021060552758475144,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004761647588262956,
"train_probe_signal/format_reward/centered_abs_mean": 0.013346354011446238,
"train_probe_signal/format_reward/group_std_mean": 0.0362943010404706,
"train_probe_signal/format_reward/group_zero_std_frac": 0.8055555820465088,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.029019565011064213,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.006673177005723119,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.30989480515321094,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.42381706337134045,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019649510582288105,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004431495947452883,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30989480515321094,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.42381706337134045,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019649510582288105,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004431495947452883,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30765336255232495,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.42102983097235364,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019507159168521564,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004399443161673844,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11950497577587764,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.17058095087607703,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007572836941108108,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017089210644674797,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.10610838606953621,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.13226349900166193,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00672729096064965,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015173499123193324,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.20468894888957342,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.2476311499873797,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01297900810216864,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00292705197352916,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30989480515321094,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.42381706337134045,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019649510582288105,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004431495947452883,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013346354011446238,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0362943010404706,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8055555820465088,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005803913033256928,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013346354632327955,
"train_probe_steps_per_second": 0.03
},
{
"calibration/aurc": 0.12065447205364044,
"calibration/batch_distribution_entropy": 0.9478632617089037,
"calibration/buffer_distribution_entropy": 0.9794554977649417,
"calibration/confidence_entropy": 0.47588792603618596,
"calibration/coverage@0%": 0.038099784583908614,
"calibration/coverage@1%": 0.038099784583908614,
"calibration/coverage@10%": 0.4993634915861277,
"calibration/coverage@15%": 0.7511866752445787,
"calibration/coverage@20%": 0.8759859472612195,
"calibration/coverage@25%": 0.9167785326644158,
"calibration/coverage@30%": 0.9570680628272251,
"calibration/coverage@5%": 0.22011283998377826,
"calibration/ece": 0.12989965288014366,
"calibration/mean_confidence": 0.6225485873164307,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008159722222222231,
"completions/max_length": 3735.0,
"completions/max_terminated_length": 3735.0,
"completions/mean_length": 840.884716796875,
"completions/mean_terminated_length": 847.7636108398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 228.8,
"epoch": 0.491993850076874,
"grad_norm": 0.005042714532464743,
"learning_rate": 4.927884615384616e-06,
"loss": -0.0202,
"num_tokens": 422042986.0,
"reward": 1.000865375995636,
"reward_std": 0.13984827995300292,
"rewards/accuracy_reward": 0.706250011920929,
"rewards/brier_reward": 0.8168227672576904,
"rewards/confidence_uniqueness_reward": 0.9406297087669373,
"rewards/format_reward": 0.9916666865348815,
"rewards/frontier_coverage_0": 0.02166607202962041,
"rewards/frontier_coverage_1": 0.02166607202962041,
"rewards/frontier_coverage_10": 0.022014103550463914,
"rewards/frontier_coverage_15": 0.040804407000541686,
"rewards/frontier_coverage_20": 0.09391007274389267,
"rewards/frontier_coverage_25": 0.1805516004562378,
"rewards/frontier_coverage_5": 0.02166607202962041,
"rewards/frontier_entropy_batch_reward": -0.2959077060222626,
"signal/accuracy_reward/centered_abs_mean": 0.18302951455116273,
"signal/accuracy_reward/group_std_mean": 0.24282491207122803,
"signal/accuracy_reward/group_zero_std_frac": 0.3083333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0774194717407226,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09151475727558137,
"signal/advantage_abs_mean": 0.7390522360801697,
"signal/advantage_pre_scale_abs_mean": 0.10357871353626251,
"signal/advantage_pre_scale_std": 0.16100256741046906,
"signal/advantage_std": 0.9833029270172119,
"signal/brier_reward/centered_abs_mean": 0.130741947889328,
"signal/brier_reward/group_std_mean": 0.17220645546913146,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15355044603347778,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013074194081127644,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02761349529027939,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04560641422867775,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032353409379720685,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027613495476543903,
"signal/format_reward/centered_abs_mean": 0.014680989645421505,
"signal/format_reward/group_std_mean": 0.03030678890645504,
"signal/format_reward/group_zero_std_frac": 0.8638888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0853647917509079,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007340494822710752,
"signal/frontier_coverage_0/centered_abs_mean": 0.18313942551612855,
"signal/frontier_coverage_0/group_std_mean": 0.24227609634399414,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030802012979984285,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026188937481492758,
"signal/frontier_coverage_1/centered_abs_mean": 0.18313942551612855,
"signal/frontier_coverage_1/group_std_mean": 0.24227609634399414,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030802012979984285,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026188937481492758,
"signal/frontier_coverage_10/centered_abs_mean": 0.18102456629276276,
"signal/frontier_coverage_10/group_std_mean": 0.23960494697093965,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030447249487042426,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025886511895805596,
"signal/frontier_coverage_15/centered_abs_mean": 0.07476659417152405,
"signal/frontier_coverage_15/group_std_mean": 0.0987214908003807,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012579312175512313,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010691623203456402,
"signal/frontier_coverage_20/centered_abs_mean": 0.0753616064786911,
"signal/frontier_coverage_20/group_std_mean": 0.09655838012695313,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012687078863382339,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010776709532365203,
"signal/frontier_coverage_25/centered_abs_mean": 0.11949286609888077,
"signal/frontier_coverage_25/group_std_mean": 0.15502755641937255,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.020129023864865304,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017087480053305625,
"signal/frontier_coverage_5/centered_abs_mean": 0.18313942551612855,
"signal/frontier_coverage_5/group_std_mean": 0.24227609634399414,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030802012979984285,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026188937481492758,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3225748658180237,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3920146644115448,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38031149506568906,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03225748799741268,
"step": 205
},
{
"calibration/aurc": 0.1283698895549012,
"calibration/batch_distribution_entropy": 0.9059175239789449,
"calibration/buffer_distribution_entropy": 0.9786380154140943,
"calibration/confidence_entropy": 0.511529493161178,
"calibration/coverage@0%": 0.0743731477469488,
"calibration/coverage@1%": 0.07960874984118964,
"calibration/coverage@10%": 0.45535827048147715,
"calibration/coverage@15%": 0.6292367935026641,
"calibration/coverage@20%": 0.8990382170848402,
"calibration/coverage@25%": 0.9410870313292656,
"calibration/coverage@30%": 0.9687391209747606,
"calibration/coverage@5%": 0.20523881659123244,
"calibration/ece": 0.1291991111896027,
"calibration/mean_confidence": 0.6613698091778775,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006336805555555536,
"completions/max_length": 3717.2,
"completions/max_terminated_length": 3717.2,
"completions/mean_length": 741.9556518554688,
"completions/mean_terminated_length": 746.6578857421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.8,
"epoch": 0.503993700078749,
"grad_norm": 0.003985654562711716,
"learning_rate": 4.987980769230769e-06,
"loss": -0.0183,
"num_tokens": 433706155.0,
"reward": 0.9879561424255371,
"reward_std": 0.1374760627746582,
"rewards/accuracy_reward": 0.6838541626930237,
"rewards/brier_reward": 0.828122079372406,
"rewards/confidence_uniqueness_reward": 0.9403419256210327,
"rewards/format_reward": 0.9931423664093018,
"rewards/frontier_coverage_0": 0.0342383430339396,
"rewards/frontier_coverage_1": 0.0342383430339396,
"rewards/frontier_coverage_10": 0.0340392192825675,
"rewards/frontier_coverage_15": 0.03967601284384727,
"rewards/frontier_coverage_20": 0.08980410993099212,
"rewards/frontier_coverage_25": 0.1722535938024521,
"rewards/frontier_coverage_5": 0.0342383430339396,
"rewards/frontier_entropy_batch_reward": -0.3365890234708786,
"signal/accuracy_reward/centered_abs_mean": 0.17376301884651185,
"signal/accuracy_reward/group_std_mean": 0.22921195328235627,
"signal/accuracy_reward/group_zero_std_frac": 0.347222226858139,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0327161431312561,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08688150942325593,
"signal/advantage_abs_mean": 0.7446362137794494,
"signal/advantage_pre_scale_abs_mean": 0.10320448130369186,
"signal/advantage_pre_scale_std": 0.1599856436252594,
"signal/advantage_std": 0.9832797527313233,
"signal/brier_reward/centered_abs_mean": 0.12006305009126664,
"signal/brier_reward/group_std_mean": 0.15930896997451782,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14385341256856918,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012006304785609245,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024894104152917863,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04236802905797958,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03055493049323559,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024894104804843663,
"signal/format_reward/centered_abs_mean": 0.01252712681889534,
"signal/format_reward/group_std_mean": 0.027480727061629295,
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07819846607744693,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00626356340944767,
"signal/frontier_coverage_0/centered_abs_mean": 0.15801767706871034,
"signal/frontier_coverage_0/group_std_mean": 0.20844950377941132,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.026977039873600006,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002259652712382376,
"signal/frontier_coverage_1/centered_abs_mean": 0.15801767706871034,
"signal/frontier_coverage_1/group_std_mean": 0.20844950377941132,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.026977039873600006,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002259652712382376,
"signal/frontier_coverage_10/centered_abs_mean": 0.15202154815196992,
"signal/frontier_coverage_10/group_std_mean": 0.20094724893569946,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02595903053879738,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00217390819452703,
"signal/frontier_coverage_15/centered_abs_mean": 0.05612077414989471,
"signal/frontier_coverage_15/group_std_mean": 0.07349130362272263,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.009616562630981207,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008025271003134548,
"signal/frontier_coverage_20/centered_abs_mean": 0.07690738439559937,
"signal/frontier_coverage_20/group_std_mean": 0.09875056445598603,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013239230774343014,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001099775591865182,
"signal/frontier_coverage_25/centered_abs_mean": 0.12822509557008743,
"signal/frontier_coverage_25/group_std_mean": 0.16526393294334413,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02204398587346077,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018336188746616243,
"signal/frontier_coverage_5/centered_abs_mean": 0.15801767706871034,
"signal/frontier_coverage_5/group_std_mean": 0.20844950377941132,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026977039873600006,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002259652712382376,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.343439394235611,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41007362604141234,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4145658850669861,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03434394001960754,
"step": 210
},
{
"calibration/aurc": 0.14320745326360448,
"calibration/batch_distribution_entropy": 0.9150604212103024,
"calibration/buffer_distribution_entropy": 0.9777807684637884,
"calibration/confidence_entropy": 0.47859022632943315,
"calibration/coverage@0%": 0.10660243473326474,
"calibration/coverage@1%": 0.1176842289285154,
"calibration/coverage@10%": 0.24826614573394507,
"calibration/coverage@15%": 0.6604307322085081,
"calibration/coverage@20%": 0.8212603585921796,
"calibration/coverage@25%": 0.932620320855615,
"calibration/coverage@30%": 0.9577540106951872,
"calibration/coverage@5%": 0.18734122101294812,
"calibration/ece": 0.17545954965586857,
"calibration/mean_confidence": 0.6471332423964474,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014236111111111116,
"completions/max_length": 3788.4,
"completions/max_terminated_length": 3788.4,
"completions/mean_length": 807.5797729492188,
"completions/mean_terminated_length": 819.5307006835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.8,
"epoch": 0.515993550080624,
"grad_norm": 0.003014960326254368,
"learning_rate": 4.957932692307692e-06,
"loss": -0.0452,
"num_tokens": 446088290.0,
"reward": 0.9924614787101745,
"reward_std": 0.13940343707799913,
"rewards/accuracy_reward": 0.70390625,
"rewards/brier_reward": 0.8037495970726013,
"rewards/confidence_uniqueness_reward": 0.9330443620681763,
"rewards/format_reward": 0.9836805462837219,
"rewards/frontier_coverage_0": 0.013330519822193309,
"rewards/frontier_coverage_1": 0.013330519822193309,
"rewards/frontier_coverage_10": 0.01646778262220323,
"rewards/frontier_coverage_15": 0.04249517768621445,
"rewards/frontier_coverage_20": 0.09611388593912125,
"rewards/frontier_coverage_25": 0.17800917625427246,
"rewards/frontier_coverage_5": 0.013330519822193309,
"rewards/frontier_entropy_batch_reward": -0.3034632682800293,
"signal/accuracy_reward/centered_abs_mean": 0.149462890625,
"signal/accuracy_reward/group_std_mean": 0.2026258021593094,
"signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9496671915054321,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0747314453125,
"signal/advantage_abs_mean": 0.7293966054916382,
"signal/advantage_pre_scale_abs_mean": 0.09994653314352035,
"signal/advantage_pre_scale_std": 0.1655849814414978,
"signal/advantage_std": 0.9832081437110901,
"signal/brier_reward/centered_abs_mean": 0.13938885033130646,
"signal/brier_reward/group_std_mean": 0.1821478396654129,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17650045454502106,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013938885927200318,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03878090418875217,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06749042719602585,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04819239303469658,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003878090484067798,
"signal/format_reward/centered_abs_mean": 0.02853732667863369,
"signal/format_reward/group_std_mean": 0.055648359656333926,
"signal/format_reward/group_zero_std_frac": 0.7666666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17448110282421112,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014268663339316845,
"signal/frontier_coverage_0/centered_abs_mean": 0.17390457689762115,
"signal/frontier_coverage_0/group_std_mean": 0.22902662456035613,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03165303654968739,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024868354201316835,
"signal/frontier_coverage_1/centered_abs_mean": 0.17390457689762115,
"signal/frontier_coverage_1/group_std_mean": 0.22902662456035613,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03165303654968739,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024868354201316835,
"signal/frontier_coverage_10/centered_abs_mean": 0.16191380023956298,
"signal/frontier_coverage_10/group_std_mean": 0.21419888734817505,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02945178672671318,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023153672926127912,
"signal/frontier_coverage_15/centered_abs_mean": 0.06422688812017441,
"signal/frontier_coverage_15/group_std_mean": 0.08302107304334641,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011686071194708348,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009184445254504681,
"signal/frontier_coverage_20/centered_abs_mean": 0.08474169373512268,
"signal/frontier_coverage_20/group_std_mean": 0.1076007753610611,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015469780191779137,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012118062004446983,
"signal/frontier_coverage_25/centered_abs_mean": 0.13222533762454985,
"signal/frontier_coverage_25/group_std_mean": 0.1688993453979492,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0241268590092659,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018908223137259483,
"signal/frontier_coverage_5/centered_abs_mean": 0.17390457689762115,
"signal/frontier_coverage_5/group_std_mean": 0.22902662456035613,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03165303654968739,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024868354201316835,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.331625634431839,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4007671117782593,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42386093735694885,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03316256329417229,
"step": 215
},
{
"calibration/aurc": 0.20061380169228143,
"calibration/batch_distribution_entropy": 0.9799272529009608,
"calibration/buffer_distribution_entropy": 0.9768673721994799,
"calibration/confidence_entropy": 0.4889103468023185,
"calibration/coverage@0%": 0.012115919594949408,
"calibration/coverage@1%": 0.012115919594949408,
"calibration/coverage@10%": 0.24258528536071894,
"calibration/coverage@15%": 0.46005582609368306,
"calibration/coverage@20%": 0.5882172287298035,
"calibration/coverage@25%": 0.6680374267611943,
"calibration/coverage@30%": 0.8304669701684627,
"calibration/coverage@5%": 0.056363151901883214,
"calibration/ece": 0.14497071478111592,
"calibration/mean_confidence": 0.5415289580677148,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.060416666666666674,
"completions/max_length": 3898.4,
"completions/max_terminated_length": 3898.4,
"completions/mean_length": 804.4072998046875,
"completions/mean_terminated_length": 855.8221435546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 230.2,
"epoch": 0.527993400082499,
"grad_norm": 0.002567050512880087,
"learning_rate": 4.927884615384616e-06,
"loss": -0.142,
"num_tokens": 458440694.0,
"reward": 0.956389057636261,
"reward_std": 0.1825144648551941,
"rewards/accuracy_reward": 0.6917534828186035,
"rewards/brier_reward": 0.7530406594276429,
"rewards/confidence_uniqueness_reward": 0.8914730548858643,
"rewards/format_reward": 0.9363715291023255,
"rewards/frontier_coverage_0": -0.014244955778121949,
"rewards/frontier_coverage_1": -0.014244955778121949,
"rewards/frontier_coverage_10": -0.009593733958899975,
"rewards/frontier_coverage_15": 0.036306874454021455,
"rewards/frontier_coverage_20": 0.09199995398521424,
"rewards/frontier_coverage_25": 0.17109810411930085,
"rewards/frontier_coverage_5": -0.014244955778121949,
"rewards/frontier_entropy_batch_reward": -0.2565806359052658,
"signal/accuracy_reward/centered_abs_mean": 0.14844292402267456,
"signal/accuracy_reward/group_std_mean": 0.2044283628463745,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8140737652778626,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07422146201133728,
"signal/advantage_abs_mean": 0.7192968487739563,
"signal/advantage_pre_scale_abs_mean": 0.13045098185539244,
"signal/advantage_pre_scale_std": 0.22229794263839722,
"signal/advantage_std": 0.9833795070648194,
"signal/brier_reward/centered_abs_mean": 0.1779948115348816,
"signal/brier_reward/group_std_mean": 0.22492851316928864,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1958990842103958,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017799481749534607,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09162591844797134,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13799885660409927,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.10163218230009079,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009162592049688102,
"signal/format_reward/centered_abs_mean": 0.0875271275639534,
"signal/format_reward/group_std_mean": 0.13421101570129396,
"signal/format_reward/group_zero_std_frac": 0.5388889074325561,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.48496673703193666,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0437635637819767,
"signal/frontier_coverage_0/centered_abs_mean": 0.19460634887218475,
"signal/frontier_coverage_0/group_std_mean": 0.2550558179616928,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03059198223054409,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027828707825392485,
"signal/frontier_coverage_1/centered_abs_mean": 0.19460634887218475,
"signal/frontier_coverage_1/group_std_mean": 0.2550558179616928,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03059198223054409,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027828707825392485,
"signal/frontier_coverage_10/centered_abs_mean": 0.17800663709640502,
"signal/frontier_coverage_10/group_std_mean": 0.23399430215358735,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02799622118473053,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002545494958758354,
"signal/frontier_coverage_15/centered_abs_mean": 0.06852128356695175,
"signal/frontier_coverage_15/group_std_mean": 0.08859587162733078,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010821715742349625,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.000979854364413768,
"signal/frontier_coverage_20/centered_abs_mean": 0.08110383749008179,
"signal/frontier_coverage_20/group_std_mean": 0.10272217839956284,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012808217480778695,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011597848031669855,
"signal/frontier_coverage_25/centered_abs_mean": 0.12214765101671218,
"signal/frontier_coverage_25/group_std_mean": 0.15550835728645324,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.019256106950342654,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001746711414307356,
"signal/frontier_coverage_5/centered_abs_mean": 0.19460634887218475,
"signal/frontier_coverage_5/group_std_mean": 0.2550558179616928,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03059198223054409,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027828707825392485,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31999850273132324,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3911717176437378,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3527726888656616,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03199984952807426,
"step": 220
},
{
"calibration/aurc": 0.11836046230203076,
"calibration/batch_distribution_entropy": 0.9299152762651394,
"calibration/buffer_distribution_entropy": 0.9769273121128415,
"calibration/confidence_entropy": 0.48964527661178403,
"calibration/coverage@0%": 0.04429167266508173,
"calibration/coverage@1%": 0.04429167266508173,
"calibration/coverage@10%": 0.6058377209623409,
"calibration/coverage@15%": 0.7308888548404051,
"calibration/coverage@20%": 0.8211582774335604,
"calibration/coverage@25%": 0.8667417686517487,
"calibration/coverage@30%": 0.8987181456637139,
"calibration/coverage@5%": 0.37037933609710344,
"calibration/ece": 0.1627648079617419,
"calibration/mean_confidence": 0.6129012228374939,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05642361111111112,
"completions/max_length": 3970.6,
"completions/max_terminated_length": 3970.6,
"completions/mean_length": 854.6477661132812,
"completions/mean_terminated_length": 906.0167846679688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 242.6,
"epoch": 0.5399932500843739,
"grad_norm": 0.0025974006857722998,
"learning_rate": 4.897836538461539e-06,
"loss": -0.1382,
"num_tokens": 471414428.0,
"reward": 0.9573588132858276,
"reward_std": 0.17803834080696107,
"rewards/accuracy_reward": 0.6902777791023255,
"rewards/brier_reward": 0.7832067370414734,
"rewards/confidence_uniqueness_reward": 0.8929091334342957,
"rewards/format_reward": 0.9434895992279053,
"rewards/frontier_coverage_0": 0.009216944687068462,
"rewards/frontier_coverage_1": 0.009216944687068462,
"rewards/frontier_coverage_10": 0.012849159445613623,
"rewards/frontier_coverage_15": 0.047060129791498186,
"rewards/frontier_coverage_20": 0.11295257806777954,
"rewards/frontier_coverage_25": 0.20341356098651886,
"rewards/frontier_coverage_5": 0.009216944687068462,
"rewards/frontier_entropy_batch_reward": -0.3291258454322815,
"signal/accuracy_reward/centered_abs_mean": 0.1601019948720932,
"signal/accuracy_reward/group_std_mean": 0.20708029568195344,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9800410747528077,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0800509974360466,
"signal/advantage_abs_mean": 0.7600647449493408,
"signal/advantage_pre_scale_abs_mean": 0.1340820536017418,
"signal/advantage_pre_scale_std": 0.22110334038734436,
"signal/advantage_std": 0.9832608580589295,
"signal/brier_reward/centered_abs_mean": 0.15971020460128785,
"signal/brier_reward/group_std_mean": 0.20280967950820922,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19527204036712648,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015971020981669425,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08719486072659492,
"signal/confidence_uniqueness_reward/group_std_mean": 0.12454380840063095,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.10635674297809601,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008719485811889171,
"signal/format_reward/centered_abs_mean": 0.080126953125,
"signal/format_reward/group_std_mean": 0.116636623442173,
"signal/format_reward/group_zero_std_frac": 0.6305555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.4883050560951233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0400634765625,
"signal/frontier_coverage_0/centered_abs_mean": 0.1580364465713501,
"signal/frontier_coverage_0/group_std_mean": 0.20654098987579345,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02764430344104767,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022599212359637024,
"signal/frontier_coverage_1/centered_abs_mean": 0.1580364465713501,
"signal/frontier_coverage_1/group_std_mean": 0.20654098987579345,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02764430344104767,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022599212359637024,
"signal/frontier_coverage_10/centered_abs_mean": 0.1387157380580902,
"signal/frontier_coverage_10/group_std_mean": 0.1824465125799179,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.024255484342575073,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019836350344121455,
"signal/frontier_coverage_15/centered_abs_mean": 0.061208389699459076,
"signal/frontier_coverage_15/group_std_mean": 0.07726839333772659,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010705550946295262,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008752800291404128,
"signal/frontier_coverage_20/centered_abs_mean": 0.09257221668958664,
"signal/frontier_coverage_20/group_std_mean": 0.11682336181402206,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016201268322765826,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013237826991826297,
"signal/frontier_coverage_25/centered_abs_mean": 0.14442039132118226,
"signal/frontier_coverage_25/group_std_mean": 0.18332480192184447,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02527971677482128,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020652116276323796,
"signal/frontier_coverage_5/centered_abs_mean": 0.1580364465713501,
"signal/frontier_coverage_5/group_std_mean": 0.20654098987579345,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02764430344104767,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022599212359637024,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3387024819850922,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40370280146598814,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4145235657691956,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03387024849653244,
"step": 225
},
{
"calibration/aurc": 0.1259500554224069,
"calibration/batch_distribution_entropy": 0.9646647743133172,
"calibration/buffer_distribution_entropy": 0.9760416707390451,
"calibration/confidence_entropy": 0.5170283098740476,
"calibration/coverage@0%": 0.1321393950216345,
"calibration/coverage@1%": 0.14262021842656228,
"calibration/coverage@10%": 0.49291161070043243,
"calibration/coverage@15%": 0.6123658945814797,
"calibration/coverage@20%": 0.8416557899316519,
"calibration/coverage@25%": 0.8898460415701794,
"calibration/coverage@30%": 0.9226909744151124,
"calibration/coverage@5%": 0.37023242162917447,
"calibration/ece": 0.17620611336596587,
"calibration/mean_confidence": 0.5703640132680619,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01987847222222221,
"completions/max_length": 3884.8,
"completions/max_terminated_length": 3884.8,
"completions/mean_length": 895.6581665039063,
"completions/mean_terminated_length": 914.0191528320313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 279.4,
"epoch": 0.5519931000862489,
"grad_norm": 0.002822543727234006,
"learning_rate": 4.867788461538462e-06,
"loss": -0.0525,
"num_tokens": 484813178.0,
"reward": 0.9878373265266418,
"reward_std": 0.13694891929626465,
"rewards/accuracy_reward": 0.6951388955116272,
"rewards/brier_reward": 0.7968821167945862,
"rewards/confidence_uniqueness_reward": 0.9321338534355164,
"rewards/format_reward": 0.9801215291023254,
"rewards/frontier_coverage_0": 0.00919010564684868,
"rewards/frontier_coverage_1": 0.00919010564684868,
"rewards/frontier_coverage_10": 0.013235241547226906,
"rewards/frontier_coverage_15": 0.04320261515676975,
"rewards/frontier_coverage_20": 0.0952497273683548,
"rewards/frontier_coverage_25": 0.17420322000980376,
"rewards/frontier_coverage_5": 0.00919010564684868,
"rewards/frontier_entropy_batch_reward": -0.2774901568889618,
"signal/accuracy_reward/centered_abs_mean": 0.14523654580116271,
"signal/accuracy_reward/group_std_mean": 0.19628881812095642,
"signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9387128591537476,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07261827290058136,
"signal/advantage_abs_mean": 0.737873125076294,
"signal/advantage_pre_scale_abs_mean": 0.09995074719190597,
"signal/advantage_pre_scale_std": 0.16725102066993713,
"signal/advantage_std": 0.9831852674484253,
"signal/brier_reward/centered_abs_mean": 0.13690456748008728,
"signal/brier_reward/group_std_mean": 0.17525491416454314,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17712121903896333,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013690456189215184,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04152504913508892,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06767643317580223,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05302448347210884,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004152504866942764,
"signal/format_reward/centered_abs_mean": 0.03237304650247097,
"signal/format_reward/group_std_mean": 0.05700000524520874,
"signal/format_reward/group_zero_std_frac": 0.7750000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2050231069326401,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.016186523251235485,
"signal/frontier_coverage_0/centered_abs_mean": 0.18283499777317047,
"signal/frontier_coverage_0/group_std_mean": 0.23812003433704376,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03383407108485699,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026145403273403643,
"signal/frontier_coverage_1/centered_abs_mean": 0.18283499777317047,
"signal/frontier_coverage_1/group_std_mean": 0.23812003433704376,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03383407108485699,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026145403273403643,
"signal/frontier_coverage_10/centered_abs_mean": 0.1571557939052582,
"signal/frontier_coverage_10/group_std_mean": 0.20568057000637055,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02907021902501583,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022473279386758804,
"signal/frontier_coverage_15/centered_abs_mean": 0.05910146087408066,
"signal/frontier_coverage_15/group_std_mean": 0.0758358508348465,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011005043797194958,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008451508358120919,
"signal/frontier_coverage_20/centered_abs_mean": 0.07917019873857498,
"signal/frontier_coverage_20/group_std_mean": 0.10083940923213959,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014786782115697861,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001132133835926652,
"signal/frontier_coverage_25/centered_abs_mean": 0.12114208936691284,
"signal/frontier_coverage_25/group_std_mean": 0.15490374565124512,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02259993925690651,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017323318403214215,
"signal/frontier_coverage_5/centered_abs_mean": 0.18283499777317047,
"signal/frontier_coverage_5/group_std_mean": 0.23812003433704376,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03383407108485699,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026145403273403643,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32804338335990907,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.396223646402359,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42523607015609743,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03280433937907219,
"step": 230
},
{
"calibration/aurc": 0.17411097994690442,
"calibration/batch_distribution_entropy": 0.9419306618782685,
"calibration/buffer_distribution_entropy": 0.9762051558491983,
"calibration/confidence_entropy": 0.459608762995326,
"calibration/coverage@0%": 0.04538488280031869,
"calibration/coverage@1%": 0.04538488280031869,
"calibration/coverage@10%": 0.43346879259359766,
"calibration/coverage@15%": 0.5383327090819566,
"calibration/coverage@20%": 0.7157446177083908,
"calibration/coverage@25%": 0.7734377818017153,
"calibration/coverage@30%": 0.8238491882719682,
"calibration/coverage@5%": 0.20049017071840577,
"calibration/ece": 0.10645703859292935,
"calibration/mean_confidence": 0.580155255120576,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009201388888888884,
"completions/max_length": 3886.6,
"completions/max_terminated_length": 3886.6,
"completions/mean_length": 938.2537353515625,
"completions/mean_terminated_length": 946.9867431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 288.8,
"epoch": 0.5639929500881239,
"grad_norm": 0.0031697500962764025,
"learning_rate": 4.837740384615385e-06,
"loss": -0.0189,
"num_tokens": 498712453.0,
"reward": 0.9915923118591309,
"reward_std": 0.12465869039297103,
"rewards/accuracy_reward": 0.6875,
"rewards/brier_reward": 0.8318912744522095,
"rewards/confidence_uniqueness_reward": 0.9377104878425598,
"rewards/format_reward": 0.9907118082046509,
"rewards/frontier_coverage_0": 0.05437804870307446,
"rewards/frontier_coverage_1": 0.05437804870307446,
"rewards/frontier_coverage_10": 0.05576707310974598,
"rewards/frontier_coverage_15": 0.06549909114837646,
"rewards/frontier_coverage_20": 0.1282554194331169,
"rewards/frontier_coverage_25": 0.2169576346874237,
"rewards/frontier_coverage_5": 0.05437805764377117,
"rewards/frontier_entropy_batch_reward": -0.33477243185043337,
"signal/accuracy_reward/centered_abs_mean": 0.14432508647441863,
"signal/accuracy_reward/group_std_mean": 0.19657641947269439,
"signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9984345197677612,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07216254323720932,
"signal/advantage_abs_mean": 0.7449512004852294,
"signal/advantage_pre_scale_abs_mean": 0.09156662523746491,
"signal/advantage_pre_scale_std": 0.14934734106063843,
"signal/advantage_std": 0.9831010937690735,
"signal/brier_reward/centered_abs_mean": 0.1267389699816704,
"signal/brier_reward/group_std_mean": 0.1676239401102066,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17560543715953827,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012673897296190261,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029656323418021203,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04549751281738281,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041109825298190114,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029656322207301856,
"signal/format_reward/centered_abs_mean": 0.01534830741584301,
"signal/format_reward/group_std_mean": 0.02814323566854,
"signal/format_reward/group_zero_std_frac": 0.8861111283302308,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10632295608520508,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007674153707921505,
"signal/frontier_coverage_0/centered_abs_mean": 0.16365084946155548,
"signal/frontier_coverage_0/group_std_mean": 0.21851195394992828,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032391490787267684,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002340207155793905,
"signal/frontier_coverage_1/centered_abs_mean": 0.16365084946155548,
"signal/frontier_coverage_1/group_std_mean": 0.21851195394992828,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032391490787267684,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002340207155793905,
"signal/frontier_coverage_10/centered_abs_mean": 0.13863745629787444,
"signal/frontier_coverage_10/group_std_mean": 0.1869216591119766,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027440791577100755,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019825156312435867,
"signal/frontier_coverage_15/centered_abs_mean": 0.06753401160240173,
"signal/frontier_coverage_15/group_std_mean": 0.0844781219959259,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013386444002389909,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009657363407313824,
"signal/frontier_coverage_20/centered_abs_mean": 0.09699487835168838,
"signal/frontier_coverage_20/group_std_mean": 0.12251366078853607,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019232844188809394,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001387026789598167,
"signal/frontier_coverage_25/centered_abs_mean": 0.1444738209247589,
"signal/frontier_coverage_25/group_std_mean": 0.18456913232803346,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02863982766866684,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002065975684672594,
"signal/frontier_coverage_5/centered_abs_mean": 0.16365076303482057,
"signal/frontier_coverage_5/group_std_mean": 0.2185118556022644,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032391472905874255,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002340205991640687,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35011342763900755,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41561758518218994,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48549712300300596,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03501134365797043,
"step": 235
},
{
"calibration/aurc": 0.14805087430462194,
"calibration/batch_distribution_entropy": 0.9523826885854355,
"calibration/buffer_distribution_entropy": 0.9767688978356597,
"calibration/confidence_entropy": 0.4784020873564082,
"calibration/coverage@0%": 0.05123674052686704,
"calibration/coverage@1%": 0.07832007386020037,
"calibration/coverage@10%": 0.43130871841538615,
"calibration/coverage@15%": 0.6591521632886991,
"calibration/coverage@20%": 0.7477753048857446,
"calibration/coverage@25%": 0.8091120104366313,
"calibration/coverage@30%": 0.8810251139219194,
"calibration/coverage@5%": 0.21753699557956985,
"calibration/ece": 0.17466656035166012,
"calibration/mean_confidence": 0.5136080409437115,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004166666666666652,
"completions/max_length": 3899.2,
"completions/max_terminated_length": 3899.2,
"completions/mean_length": 1022.4588745117187,
"completions/mean_terminated_length": 1026.7896850585937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 307.6,
"epoch": 0.5759928000899989,
"grad_norm": 0.0034370513167232275,
"learning_rate": 4.807692307692308e-06,
"loss": -0.0014,
"num_tokens": 513614667.0,
"reward": 1.0006279349327087,
"reward_std": 0.11839591711759567,
"rewards/accuracy_reward": 0.6960069417953492,
"rewards/brier_reward": 0.8194296598434448,
"rewards/confidence_uniqueness_reward": 0.9458336114883423,
"rewards/format_reward": 0.9958333253860474,
"rewards/frontier_coverage_0": 0.033486737415660175,
"rewards/frontier_coverage_1": 0.033486737415660175,
"rewards/frontier_coverage_10": 0.03658188153058291,
"rewards/frontier_coverage_15": 0.05663715898990631,
"rewards/frontier_coverage_20": 0.11354580670595169,
"rewards/frontier_coverage_25": 0.19528359770774842,
"rewards/frontier_coverage_5": 0.03349523107754067,
"rewards/frontier_entropy_batch_reward": -0.2900451928377151,
"signal/accuracy_reward/centered_abs_mean": 0.15009765625,
"signal/accuracy_reward/group_std_mean": 0.19873380959033965,
"signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0312488436698914,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.075048828125,
"signal/advantage_abs_mean": 0.7495712041854858,
"signal/advantage_pre_scale_abs_mean": 0.08905573338270187,
"signal/advantage_pre_scale_std": 0.1401791453361511,
"signal/advantage_std": 0.9831058859825135,
"signal/brier_reward/centered_abs_mean": 0.13217740058898925,
"signal/brier_reward/group_std_mean": 0.17042210102081298,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18192693293094636,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013217740133404732,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0206814207136631,
"signal/confidence_uniqueness_reward/group_std_mean": 0.032660214602947234,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02828650362789631,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020681420573964715,
"signal/format_reward/centered_abs_mean": 0.0074761285330168905,
"signal/format_reward/group_std_mean": 0.01654504146426916,
"signal/format_reward/group_zero_std_frac": 0.9222222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.050303632486611606,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0037380642665084452,
"signal/frontier_coverage_0/centered_abs_mean": 0.19069683253765107,
"signal/frontier_coverage_0/group_std_mean": 0.24695312976837158,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037591222673654556,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027269646525382996,
"signal/frontier_coverage_1/centered_abs_mean": 0.19069683253765107,
"signal/frontier_coverage_1/group_std_mean": 0.24695312976837158,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037591222673654556,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027269646525382996,
"signal/frontier_coverage_10/centered_abs_mean": 0.15149562060832977,
"signal/frontier_coverage_10/group_std_mean": 0.19783547520637512,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02986070066690445,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021663873456418516,
"signal/frontier_coverage_15/centered_abs_mean": 0.06695376113057136,
"signal/frontier_coverage_15/group_std_mean": 0.08413880467414855,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013179291039705276,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009574387571774423,
"signal/frontier_coverage_20/centered_abs_mean": 0.09091886132955551,
"signal/frontier_coverage_20/group_std_mean": 0.11494339257478714,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01790587417781353,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013001396786421538,
"signal/frontier_coverage_25/centered_abs_mean": 0.13444166928529738,
"signal/frontier_coverage_25/group_std_mean": 0.1717398762702942,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026477331668138503,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001922515919432044,
"signal/frontier_coverage_5/centered_abs_mean": 0.19062730073928832,
"signal/frontier_coverage_5/group_std_mean": 0.24686557352542876,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037577494978904724,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027259701397269963,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33337036371231077,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4049807250499725,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4597449839115143,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333703950047493,
"step": 240
},
{
"calibration/aurc": 0.16900973620820287,
"calibration/batch_distribution_entropy": 0.9771511357937541,
"calibration/buffer_distribution_entropy": 0.9782663947673533,
"calibration/confidence_entropy": 0.4753566342042407,
"calibration/coverage@0%": 0.03663830715532286,
"calibration/coverage@1%": 0.03663830715532286,
"calibration/coverage@10%": 0.4480230148342059,
"calibration/coverage@15%": 0.5379090314136126,
"calibration/coverage@20%": 0.6642861038394414,
"calibration/coverage@25%": 0.7254417539267015,
"calibration/coverage@30%": 0.8279804755671902,
"calibration/coverage@5%": 0.25915139616055843,
"calibration/ece": 0.15252473465314134,
"calibration/mean_confidence": 0.5344061439463352,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003993055555555558,
"completions/max_length": 3985.6,
"completions/max_terminated_length": 3985.6,
"completions/mean_length": 1053.7048828125,
"completions/mean_terminated_length": 1057.9298950195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 314.8,
"epoch": 0.5879926500918738,
"grad_norm": 0.0036573780234903097,
"learning_rate": 4.777644230769231e-06,
"loss": -0.0107,
"num_tokens": 528848419.0,
"reward": 1.0006993889808655,
"reward_std": 0.11764541864395142,
"rewards/accuracy_reward": 0.6928819417953491,
"rewards/brier_reward": 0.8288188457489014,
"rewards/confidence_uniqueness_reward": 0.9452489256858826,
"rewards/format_reward": 0.9959201335906982,
"rewards/frontier_coverage_0": 0.03839828912168741,
"rewards/frontier_coverage_1": 0.03839828912168741,
"rewards/frontier_coverage_10": 0.04134276360273361,
"rewards/frontier_coverage_15": 0.06121814027428627,
"rewards/frontier_coverage_20": 0.12238069325685501,
"rewards/frontier_coverage_25": 0.20530767738819122,
"rewards/frontier_coverage_5": 0.038481189869344234,
"rewards/frontier_entropy_batch_reward": -0.2890947461128235,
"signal/accuracy_reward/centered_abs_mean": 0.14274088740348817,
"signal/accuracy_reward/group_std_mean": 0.1888882637023926,
"signal/accuracy_reward/group_zero_std_frac": 0.45833333134651183,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0081943988800048,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07137044370174409,
"signal/advantage_abs_mean": 0.7547778844833374,
"signal/advantage_pre_scale_abs_mean": 0.0882973000407219,
"signal/advantage_pre_scale_std": 0.14071423560380936,
"signal/advantage_std": 0.9830685734748841,
"signal/brier_reward/centered_abs_mean": 0.12028724402189254,
"signal/brier_reward/group_std_mean": 0.15494127571582794,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17019274830818176,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01202872470021248,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020508787035942076,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03213195875287056,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028992549702525138,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020508787129074335,
"signal/format_reward/centered_abs_mean": 0.00753580741584301,
"signal/format_reward/group_std_mean": 0.016222146898508073,
"signal/format_reward/group_zero_std_frac": 0.9277777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05308457799255848,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003767903707921505,
"signal/frontier_coverage_0/centered_abs_mean": 0.16301291882991792,
"signal/frontier_coverage_0/group_std_mean": 0.2091508388519287,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03296785391867161,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002331084804609418,
"signal/frontier_coverage_1/centered_abs_mean": 0.16301291882991792,
"signal/frontier_coverage_1/group_std_mean": 0.2091508388519287,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03296785391867161,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002331084804609418,
"signal/frontier_coverage_10/centered_abs_mean": 0.12608958184719085,
"signal/frontier_coverage_10/group_std_mean": 0.16320188641548156,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.025517260655760765,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018030810402706265,
"signal/frontier_coverage_15/centered_abs_mean": 0.06297002360224724,
"signal/frontier_coverage_15/group_std_mean": 0.07902814149856567,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0127723790705204,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009004713268950581,
"signal/frontier_coverage_20/centered_abs_mean": 0.09279530793428421,
"signal/frontier_coverage_20/group_std_mean": 0.11791681945323944,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0188205661252141,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013269728980958462,
"signal/frontier_coverage_25/centered_abs_mean": 0.13788617104291917,
"signal/frontier_coverage_25/group_std_mean": 0.17627765834331513,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02795053906738758,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001971772196702659,
"signal/frontier_coverage_5/centered_abs_mean": 0.16277010440826417,
"signal/frontier_coverage_5/group_std_mean": 0.2088464915752411,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032919974997639656,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023276124149560927,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3240995168685913,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3907240152359009,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4595638155937195,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03240995183587074,
"step": 245
},
{
"calibration/aurc": 0.19836911869531343,
"calibration/batch_distribution_entropy": 0.9675209434010308,
"calibration/buffer_distribution_entropy": 0.9786828224775217,
"calibration/confidence_entropy": 0.4952140725739983,
"calibration/coverage@0%": 0.05794984769364665,
"calibration/coverage@1%": 0.08040415578764143,
"calibration/coverage@10%": 0.28509573542210614,
"calibration/coverage@15%": 0.4285329634464752,
"calibration/coverage@20%": 0.5708333333333334,
"calibration/coverage@25%": 0.7375,
"calibration/coverage@30%": 0.7755208333333333,
"calibration/coverage@5%": 0.17335454743255005,
"calibration/ece": 0.22776192901028072,
"calibration/mean_confidence": 0.5247643825024477,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004861111111111138,
"completions/max_length": 3758.6,
"completions/max_terminated_length": 3758.6,
"completions/mean_length": 1066.9283203125,
"completions/mean_terminated_length": 1072.1962158203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 466.8,
"epoch": 0.5999925000937488,
"grad_norm": 0.0039780340157449245,
"learning_rate": 4.747596153846154e-06,
"loss": -0.0083,
"num_tokens": 544243433.0,
"reward": 0.9987628102302551,
"reward_std": 0.11265359967947006,
"rewards/accuracy_reward": 0.6972222328186035,
"rewards/brier_reward": 0.8149329781532287,
"rewards/confidence_uniqueness_reward": 0.9435397028923035,
"rewards/format_reward": 0.9951388835906982,
"rewards/frontier_coverage_0": 0.030471760779619217,
"rewards/frontier_coverage_1": 0.030471760779619217,
"rewards/frontier_coverage_10": 0.032759527862071994,
"rewards/frontier_coverage_15": 0.06044907793402672,
"rewards/frontier_coverage_20": 0.1200255960226059,
"rewards/frontier_coverage_25": 0.19917434453964233,
"rewards/frontier_coverage_5": 0.030477907881140708,
"rewards/frontier_entropy_batch_reward": -0.3046976327896118,
"signal/accuracy_reward/centered_abs_mean": 0.13567708283662797,
"signal/accuracy_reward/group_std_mean": 0.176465305685997,
"signal/accuracy_reward/group_zero_std_frac": 0.5000000119209289,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0347381114959717,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06783854141831398,
"signal/advantage_abs_mean": 0.762245523929596,
"signal/advantage_pre_scale_abs_mean": 0.0862567737698555,
"signal/advantage_pre_scale_std": 0.1369766414165497,
"signal/advantage_std": 0.982957637310028,
"signal/brier_reward/centered_abs_mean": 0.12140593230724335,
"signal/brier_reward/group_std_mean": 0.15745844841003417,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1853317677974701,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012140593118965625,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0218271866440773,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03432033360004425,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03319373317062855,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021827186457812784,
"signal/format_reward/centered_abs_mean": 0.008181423833593725,
"signal/format_reward/group_std_mean": 0.017716386914253236,
"signal/format_reward/group_zero_std_frac": 0.9166666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0609793234616518,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004090711916796863,
"signal/frontier_coverage_0/centered_abs_mean": 0.1704305589199066,
"signal/frontier_coverage_0/group_std_mean": 0.22056526243686675,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03730859383940697,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024371568579226732,
"signal/frontier_coverage_1/centered_abs_mean": 0.1704305589199066,
"signal/frontier_coverage_1/group_std_mean": 0.22056526243686675,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03730859383940697,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024371568579226732,
"signal/frontier_coverage_10/centered_abs_mean": 0.1306050345301628,
"signal/frontier_coverage_10/group_std_mean": 0.16988409161567689,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.028580862656235696,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018676520558074117,
"signal/frontier_coverage_15/centered_abs_mean": 0.06310615763068199,
"signal/frontier_coverage_15/group_std_mean": 0.07897710651159287,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013806315325200557,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009024180588312447,
"signal/frontier_coverage_20/centered_abs_mean": 0.08676007241010666,
"signal/frontier_coverage_20/group_std_mean": 0.10938064604997635,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018959224969148637,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012406690511852503,
"signal/frontier_coverage_25/centered_abs_mean": 0.12573317885398866,
"signal/frontier_coverage_25/group_std_mean": 0.1593530297279358,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027454627305269243,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017979845171794296,
"signal/frontier_coverage_5/centered_abs_mean": 0.17006706297397614,
"signal/frontier_coverage_5/group_std_mean": 0.22009154856204988,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037230221554636955,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002431959193199873,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32585279941558837,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39470202326774595,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4988634824752808,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03258528187870979,
"step": 250
},
{
"epoch": 0.5999925000937488,
"eval_calibration/aurc": 0.1385071168629361,
"eval_calibration/batch_distribution_entropy": 0.9366491473344589,
"eval_calibration/buffer_distribution_entropy": 0.9790936212740405,
"eval_calibration/confidence_entropy": 0.4766008180966541,
"eval_calibration/coverage@0%": 0.3082997311827957,
"eval_calibration/coverage@1%": 0.3082997311827957,
"eval_calibration/coverage@10%": 0.5120967741935484,
"eval_calibration/coverage@15%": 0.597614247311828,
"eval_calibration/coverage@20%": 0.760752688172043,
"eval_calibration/coverage@25%": 0.8610551075268816,
"eval_calibration/coverage@30%": 0.946236559139785,
"eval_calibration/coverage@5%": 0.3082997311827957,
"eval_calibration/ece": 0.25818358534946234,
"eval_calibration/mean_confidence": 0.5132839549731183,
"eval_completions/clipped_ratio": 0.006944444444444457,
"eval_completions/max_length": 3228.1666666666665,
"eval_completions/max_terminated_length": 3228.1666666666665,
"eval_completions/mean_length": 1050.439961751302,
"eval_completions/mean_terminated_length": 1057.86083984375,
"eval_completions/min_length": 93.66666666666667,
"eval_completions/min_terminated_length": 480.6666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 544243433.0,
"eval_reward": 0.9110045929749807,
"eval_reward_std": 0.2316128040353457,
"eval_rewards/accuracy_reward": 0.6814236144224802,
"eval_rewards/brier_reward": 0.7850200235843658,
"eval_rewards/confidence_uniqueness_reward": 0.8943492472171783,
"eval_rewards/format_reward": 0.9921875099341074,
"eval_rewards/frontier_coverage_0": 0.014424265439932546,
"eval_rewards/frontier_coverage_1": 0.014424265439932546,
"eval_rewards/frontier_coverage_10": 0.018757762853056192,
"eval_rewards/frontier_coverage_15": 0.05081125907599926,
"eval_rewards/frontier_coverage_20": 0.10140267262856166,
"eval_rewards/frontier_coverage_25": 0.16897361477216086,
"eval_rewards/frontier_coverage_5": 0.014481973213454088,
"eval_rewards/frontier_entropy_batch_reward": -0.9921875099341074,
"eval_runtime": 211.4128,
"eval_samples_per_second": 4.73,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4216037342945735,
"eval_signal/accuracy_reward/group_std_mean": 0.46546362340450287,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9207923909028372,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21080186714728674,
"eval_signal/advantage_abs_mean": 0.8664587438106537,
"eval_signal/advantage_pre_scale_abs_mean": 0.20114790399869284,
"eval_signal/advantage_pre_scale_std": 0.2299346203605334,
"eval_signal/advantage_std": 0.9863927960395813,
"eval_signal/brier_reward/centered_abs_mean": 0.20396561920642853,
"eval_signal/brier_reward/group_std_mean": 0.25821878264347714,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08907666057348251,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020396563224494457,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045910464599728584,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07307459662357967,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020038395809630554,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004591046599671245,
"eval_signal/format_reward/centered_abs_mean": 0.015136718439559141,
"eval_signal/format_reward/group_std_mean": 0.044194173688689865,
"eval_signal/format_reward/group_zero_std_frac": 0.7500000298023224,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.03261481939504544,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359219779571,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3264067123333613,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4418923109769821,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.020408068783581257,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0046676161388556165,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3264067123333613,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4418923109769821,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.020408068783581257,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0046676161388556165,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.24455400804678598,
"eval_signal/frontier_coverage_10/group_std_mean": 0.3390832841396332,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015295245063801607,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034971223988880715,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.08623725920915604,
"eval_signal/frontier_coverage_15/group_std_mean": 0.10872507840394974,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005391905394693215,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012331928010098636,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.13774426033099493,
"eval_signal/frontier_coverage_20/group_std_mean": 0.17580651740233103,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.008605041385938724,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019697428409320614,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.22903726249933243,
"eval_signal/frontier_coverage_25/group_std_mean": 0.2837483336528142,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01430831989273429,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00327523285523057,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.325265496969223,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4404994646708171,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02033673506230116,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0046512965733806295,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.015136718439559141,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.044194173688689865,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7500000298023224,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.006522963910053174,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0015136719254466395,
"eval_steps_per_second": 0.028,
"step": 250
},
{
"epoch": 0.5999925000937488,
"step": 250,
"train_probe_calibration/aurc": 0.14293172797978523,
"train_probe_calibration/batch_distribution_entropy": 0.9377545299480441,
"train_probe_calibration/buffer_distribution_entropy": 0.9791906702728134,
"train_probe_calibration/confidence_entropy": 0.46180745643026194,
"train_probe_calibration/coverage@0%": 0.19027777777777777,
"train_probe_calibration/coverage@1%": 0.19027777777777777,
"train_probe_calibration/coverage@10%": 0.4875,
"train_probe_calibration/coverage@15%": 0.68125,
"train_probe_calibration/coverage@20%": 0.8065972222222223,
"train_probe_calibration/coverage@25%": 0.9270833333333334,
"train_probe_calibration/coverage@30%": 0.984375,
"train_probe_calibration/coverage@5%": 0.2569444444444444,
"train_probe_calibration/ece": 0.2584190625,
"train_probe_calibration/mean_confidence": 0.5207807291666667,
"train_probe_completions/clipped_ratio": 0.00434027777777779,
"train_probe_completions/max_length": 3085.3333333333335,
"train_probe_completions/max_terminated_length": 3085.3333333333335,
"train_probe_completions/mean_length": 1064.1022847493489,
"train_probe_completions/mean_terminated_length": 1068.6518046061199,
"train_probe_completions/min_length": 169.5,
"train_probe_completions/min_terminated_length": 459.3333333333333,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 544243433.0,
"train_probe_reward": 0.9264779885609945,
"train_probe_reward_std": 0.22200091928243637,
"train_probe_rewards/accuracy_reward": 0.7013888855775198,
"train_probe_rewards/brier_reward": 0.8103241423765818,
"train_probe_rewards/confidence_uniqueness_reward": 0.8954811990261078,
"train_probe_rewards/format_reward": 0.9956597288449606,
"train_probe_rewards/frontier_coverage_0": 0.024293637834489346,
"train_probe_rewards/frontier_coverage_1": 0.024293637834489346,
"train_probe_rewards/frontier_coverage_10": 0.03029835526831448,
"train_probe_rewards/frontier_coverage_15": 0.06300980473558108,
"train_probe_rewards/frontier_coverage_20": 0.12049084653457005,
"train_probe_rewards/frontier_coverage_25": 0.1985230545202891,
"train_probe_rewards/frontier_coverage_5": 0.024340201790134113,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9956597288449606,
"train_probe_runtime": 197.6873,
"train_probe_samples_per_second": 5.058,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4080946147441864,
"train_probe_signal/accuracy_reward/group_std_mean": 0.457661638657252,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.930666039387385,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2040473073720932,
"train_probe_signal/advantage_abs_mean": 0.8634511530399323,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.19239966322978339,
"train_probe_signal/advantage_pre_scale_std": 0.22050043443838754,
"train_probe_signal/advantage_std": 0.9863737424214681,
"train_probe_signal/brier_reward/centered_abs_mean": 0.18432209392388663,
"train_probe_signal/brier_reward/group_std_mean": 0.237020214398702,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08412475387255351,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01843220926821232,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045412225648760796,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.062134902303417526,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02067517675459385,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045412226269642515,
"train_probe_signal/format_reward/centered_abs_mean": 0.008300781094779571,
"train_probe_signal/format_reward/group_std_mean": 0.021562910017867882,
"train_probe_signal/format_reward/group_zero_std_frac": 0.8888889153798422,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018439628494282562,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004150390547389786,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.31929043928782147,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.4300284336010615,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.020870385070641834,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004565853159874678,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.31929043928782147,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4300284336010615,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.020870385070641834,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004565853159874678,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.23945064842700958,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.32852159440517426,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015652922447770834,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034241442335769534,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.08496188372373581,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.10551841805378596,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005550025108580788,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012149549438618124,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.14249480267365774,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.1771022950609525,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009304065412531296,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020376756826105216,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.23307730754216513,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.28281400601069134,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015215486288070679,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003333005510891477,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.31822994848092395,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4287427266438802,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.020800404871503513,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00455068820156157,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008300781094779571,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.021562910017867882,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8888889153798422,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.003687925481547912,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008300781094779571,
"train_probe_steps_per_second": 0.03
},
{
"calibration/aurc": 0.15048834529617455,
"calibration/batch_distribution_entropy": 0.9338231344693714,
"calibration/buffer_distribution_entropy": 0.9794221749818541,
"calibration/confidence_entropy": 0.4710076533233304,
"calibration/coverage@0%": 0.02302989293558958,
"calibration/coverage@1%": 0.02302989293558958,
"calibration/coverage@10%": 0.5242672934797624,
"calibration/coverage@15%": 0.6491196556844868,
"calibration/coverage@20%": 0.6915409155270065,
"calibration/coverage@25%": 0.8355510084265783,
"calibration/coverage@30%": 0.897499654648432,
"calibration/coverage@5%": 0.27028120183611315,
"calibration/ece": 0.14773781862187638,
"calibration/mean_confidence": 0.6141524500746167,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006336805555555558,
"completions/max_length": 3891.2,
"completions/max_terminated_length": 3891.2,
"completions/mean_length": 1071.4462036132813,
"completions/mean_terminated_length": 1078.306103515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 387.2,
"epoch": 0.6119923500956238,
"grad_norm": 0.003938134294003248,
"learning_rate": 4.7175480769230775e-06,
"loss": -0.0119,
"num_tokens": 559683901.0,
"reward": 0.991357171535492,
"reward_std": 0.11556299179792404,
"rewards/accuracy_reward": 0.6925347328186036,
"rewards/brier_reward": 0.812757420539856,
"rewards/confidence_uniqueness_reward": 0.939787495136261,
"rewards/format_reward": 0.9936631917953491,
"rewards/frontier_coverage_0": 0.02287477208301425,
"rewards/frontier_coverage_1": 0.02287477208301425,
"rewards/frontier_coverage_10": 0.027250152826309205,
"rewards/frontier_coverage_15": 0.0626752346754074,
"rewards/frontier_coverage_20": 0.12636226266622544,
"rewards/frontier_coverage_25": 0.2056520938873291,
"rewards/frontier_coverage_5": 0.022994631039910018,
"rewards/frontier_entropy_batch_reward": -0.34013040363788605,
"signal/accuracy_reward/centered_abs_mean": 0.1337456613779068,
"signal/accuracy_reward/group_std_mean": 0.18114876449108125,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.009685182571411,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0668728306889534,
"signal/advantage_abs_mean": 0.7468565940856934,
"signal/advantage_pre_scale_abs_mean": 0.08542105257511139,
"signal/advantage_pre_scale_std": 0.14232320189476014,
"signal/advantage_std": 0.9829666376113891,
"signal/brier_reward/centered_abs_mean": 0.12079736739397048,
"signal/brier_reward/group_std_mean": 0.1556779623031616,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1833444505929947,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012079737335443496,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025875761359930038,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03979103080928326,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03993023969233036,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025875761872157454,
"signal/format_reward/centered_abs_mean": 0.011170790065079927,
"signal/format_reward/group_std_mean": 0.02186266928911209,
"signal/format_reward/group_zero_std_frac": 0.9083333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08671931773424149,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005585395032539964,
"signal/frontier_coverage_0/centered_abs_mean": 0.15731475353240967,
"signal/frontier_coverage_0/group_std_mean": 0.2038748413324356,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033884177729487416,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022496009478345513,
"signal/frontier_coverage_1/centered_abs_mean": 0.15731475353240967,
"signal/frontier_coverage_1/group_std_mean": 0.2038748413324356,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033884177729487416,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022496009478345513,
"signal/frontier_coverage_10/centered_abs_mean": 0.1202172502875328,
"signal/frontier_coverage_10/group_std_mean": 0.15625946074724198,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02589981146156788,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001719106617383659,
"signal/frontier_coverage_15/centered_abs_mean": 0.06376026198267937,
"signal/frontier_coverage_15/group_std_mean": 0.07974331229925155,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013887671194970608,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009117717272602022,
"signal/frontier_coverage_20/centered_abs_mean": 0.09358802139759063,
"signal/frontier_coverage_20/group_std_mean": 0.1184041753411293,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020462489500641824,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013383086305111646,
"signal/frontier_coverage_25/centered_abs_mean": 0.13497862517833709,
"signal/frontier_coverage_25/group_std_mean": 0.17217137515544892,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02949381247162819,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019301943248137832,
"signal/frontier_coverage_5/centered_abs_mean": 0.1568644642829895,
"signal/frontier_coverage_5/group_std_mean": 0.2033059686422348,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03378809839487076,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002243161806836724,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3343395471572876,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40089446902275083,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5089798927307129,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033433955162763596,
"step": 255
},
{
"calibration/aurc": 0.10255353201496739,
"calibration/batch_distribution_entropy": 0.9639092103771155,
"calibration/buffer_distribution_entropy": 0.9797833697904046,
"calibration/confidence_entropy": 0.49060049661677196,
"calibration/coverage@0%": 0.10018920337024192,
"calibration/coverage@1%": 0.1544279435791192,
"calibration/coverage@10%": 0.5559179569224634,
"calibration/coverage@15%": 0.7529552754007864,
"calibration/coverage@20%": 0.8581241780108335,
"calibration/coverage@25%": 0.9133015753791163,
"calibration/coverage@30%": 0.9649038163370701,
"calibration/coverage@5%": 0.4035025552714583,
"calibration/ece": 0.17171925216525696,
"calibration/mean_confidence": 0.5537974322635033,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01137152777777779,
"completions/max_length": 3994.8,
"completions/max_terminated_length": 3994.8,
"completions/mean_length": 1101.7650268554687,
"completions/mean_terminated_length": 1114.241162109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 329.8,
"epoch": 0.6239922000974988,
"grad_norm": 0.0035454921890050173,
"learning_rate": 4.6875000000000004e-06,
"loss": -0.0275,
"num_tokens": 575475466.0,
"reward": 1.003694999217987,
"reward_std": 0.12208217233419419,
"rewards/accuracy_reward": 0.7048611164093017,
"rewards/brier_reward": 0.8128533840179444,
"rewards/confidence_uniqueness_reward": 0.9410479426383972,
"rewards/format_reward": 0.9886284589767456,
"rewards/frontier_coverage_0": 0.022731726244091987,
"rewards/frontier_coverage_1": 0.022731726244091987,
"rewards/frontier_coverage_10": 0.031263113394379614,
"rewards/frontier_coverage_15": 0.06570351123809814,
"rewards/frontier_coverage_20": 0.12733635306358337,
"rewards/frontier_coverage_25": 0.20722153186798095,
"rewards/frontier_coverage_5": 0.02290651835501194,
"rewards/frontier_entropy_batch_reward": -0.2558844447135925,
"signal/accuracy_reward/centered_abs_mean": 0.13758680671453477,
"signal/accuracy_reward/group_std_mean": 0.18783430457115174,
"signal/accuracy_reward/group_zero_std_frac": 0.44166666865348814,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0014619827270508,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06879340335726739,
"signal/advantage_abs_mean": 0.7372382164001465,
"signal/advantage_pre_scale_abs_mean": 0.08913364708423614,
"signal/advantage_pre_scale_std": 0.14821802377700805,
"signal/advantage_std": 0.9830303311347961,
"signal/brier_reward/centered_abs_mean": 0.13460603803396226,
"signal/brier_reward/group_std_mean": 0.17414171397686004,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1958859771490097,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013460604101419449,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02883324772119522,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04651344493031502,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.042122557386755945,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002883324818685651,
"signal/format_reward/centered_abs_mean": 0.018267144076526166,
"signal/format_reward/group_std_mean": 0.03373241238296032,
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.13340308517217636,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009133572038263083,
"signal/frontier_coverage_0/centered_abs_mean": 0.18028615415096283,
"signal/frontier_coverage_0/group_std_mean": 0.2340587854385376,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03751359954476356,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025780918542295693,
"signal/frontier_coverage_1/centered_abs_mean": 0.18028615415096283,
"signal/frontier_coverage_1/group_std_mean": 0.2340587854385376,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03751359954476356,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025780918542295693,
"signal/frontier_coverage_10/centered_abs_mean": 0.1330754965543747,
"signal/frontier_coverage_10/group_std_mean": 0.1743028372526169,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027698947116732597,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019029794726520776,
"signal/frontier_coverage_15/centered_abs_mean": 0.0681751549243927,
"signal/frontier_coverage_15/group_std_mean": 0.08557239919900894,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014194411225616932,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009749047341756523,
"signal/frontier_coverage_20/centered_abs_mean": 0.09594286382198333,
"signal/frontier_coverage_20/group_std_mean": 0.12045165747404099,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019987112656235696,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013719829730689526,
"signal/frontier_coverage_25/centered_abs_mean": 0.1361988067626953,
"signal/frontier_coverage_25/group_std_mean": 0.17252913117408752,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028375216573476792,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019476428860798478,
"signal/frontier_coverage_5/centered_abs_mean": 0.17972335517406463,
"signal/frontier_coverage_5/group_std_mean": 0.23334594070911407,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037396402657032014,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025700438302010296,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30619403123855593,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3734066069126129,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4467845559120178,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03061940483748913,
"step": 260
},
{
"calibration/aurc": 0.12449450690978159,
"calibration/batch_distribution_entropy": 0.9773936708097445,
"calibration/buffer_distribution_entropy": 0.9805711500932638,
"calibration/confidence_entropy": 0.48593347695128186,
"calibration/coverage@0%": 0.08784057472010717,
"calibration/coverage@1%": 0.08784057472010717,
"calibration/coverage@10%": 0.587357013439868,
"calibration/coverage@15%": 0.6641879363001746,
"calibration/coverage@20%": 0.778266797556719,
"calibration/coverage@25%": 0.8677356020942408,
"calibration/coverage@30%": 0.8980339223385689,
"calibration/coverage@5%": 0.35331105411489167,
"calibration/ece": 0.18071845532152359,
"calibration/mean_confidence": 0.555744081252791,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00894097222222221,
"completions/max_length": 3896.4,
"completions/max_terminated_length": 3896.4,
"completions/mean_length": 1038.3904541015625,
"completions/mean_terminated_length": 1047.8605834960938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 294.6,
"epoch": 0.6359920500993738,
"grad_norm": 0.0030857024248689413,
"learning_rate": 4.657451923076923e-06,
"loss": -0.0334,
"num_tokens": 590507548.0,
"reward": 1.000011420249939,
"reward_std": 0.11584964096546173,
"rewards/accuracy_reward": 0.6975694417953491,
"rewards/brier_reward": 0.8233382225036621,
"rewards/confidence_uniqueness_reward": 0.9414842247962951,
"rewards/format_reward": 0.9910590410232544,
"rewards/frontier_coverage_0": 0.034576449729502204,
"rewards/frontier_coverage_1": 0.034576449729502204,
"rewards/frontier_coverage_10": 0.04155855402350426,
"rewards/frontier_coverage_15": 0.07075799554586411,
"rewards/frontier_coverage_20": 0.1364063397049904,
"rewards/frontier_coverage_25": 0.2174540102481842,
"rewards/frontier_coverage_5": 0.03471278678625822,
"rewards/frontier_entropy_batch_reward": -0.2893666684627533,
"signal/accuracy_reward/centered_abs_mean": 0.1317057266831398,
"signal/accuracy_reward/group_std_mean": 0.17435995638370513,
"signal/accuracy_reward/group_zero_std_frac": 0.5027777969837188,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9813842535018921,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0658528633415699,
"signal/advantage_abs_mean": 0.7615089535713195,
"signal/advantage_pre_scale_abs_mean": 0.08686984926462174,
"signal/advantage_pre_scale_std": 0.14367577582597732,
"signal/advantage_std": 0.9829856991767884,
"signal/brier_reward/centered_abs_mean": 0.12998353093862533,
"signal/brier_reward/group_std_mean": 0.16700247824192047,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19465568661689758,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012998353131115437,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027591006830334663,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0436860203742981,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041278140246868135,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027591006364673376,
"signal/format_reward/centered_abs_mean": 0.015413411241024733,
"signal/format_reward/group_std_mean": 0.0289100106805563,
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11416576504707336,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007706705620512367,
"signal/frontier_coverage_0/centered_abs_mean": 0.17794593572616577,
"signal/frontier_coverage_0/group_std_mean": 0.22968710362911224,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038241232931613925,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002544626779854298,
"signal/frontier_coverage_1/centered_abs_mean": 0.17794593572616577,
"signal/frontier_coverage_1/group_std_mean": 0.22968710362911224,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038241232931613925,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002544626779854298,
"signal/frontier_coverage_10/centered_abs_mean": 0.11253818869590759,
"signal/frontier_coverage_10/group_std_mean": 0.1477993905544281,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.024206925183534622,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016092960722744465,
"signal/frontier_coverage_15/centered_abs_mean": 0.06968608945608139,
"signal/frontier_coverage_15/group_std_mean": 0.0868400439620018,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01497387420386076,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009965110919438302,
"signal/frontier_coverage_20/centered_abs_mean": 0.09935293346643448,
"signal/frontier_coverage_20/group_std_mean": 0.12432914227247238,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021300822868943213,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014207469765096902,
"signal/frontier_coverage_25/centered_abs_mean": 0.1397058293223381,
"signal/frontier_coverage_25/group_std_mean": 0.17603937685489654,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029925085604190826,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00199779337272048,
"signal/frontier_coverage_5/centered_abs_mean": 0.17707998752593995,
"signal/frontier_coverage_5/group_std_mean": 0.22862663865089417,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03805320970714092,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002532243775203824,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3300370931625366,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39845150113105776,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.496671199798584,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03300370946526528,
"step": 265
},
{
"calibration/aurc": 0.12281177092511357,
"calibration/batch_distribution_entropy": 0.9656192498774155,
"calibration/buffer_distribution_entropy": 0.9823104864801895,
"calibration/confidence_entropy": 0.47402582816426486,
"calibration/coverage@0%": 0.03688186664003849,
"calibration/coverage@1%": 0.03688186664003849,
"calibration/coverage@10%": 0.49601819978997863,
"calibration/coverage@15%": 0.6787215172399611,
"calibration/coverage@20%": 0.8241145702041397,
"calibration/coverage@25%": 0.8999529551348966,
"calibration/coverage@30%": 0.9731249559051784,
"calibration/coverage@5%": 0.28127246150857715,
"calibration/ece": 0.17982517686639335,
"calibration/mean_confidence": 0.5375241826053218,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012413194444444442,
"completions/max_length": 4045.4,
"completions/max_terminated_length": 4045.4,
"completions/mean_length": 991.3185913085938,
"completions/mean_terminated_length": 1003.8561401367188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 296.0,
"epoch": 0.6479919001012487,
"grad_norm": 0.002968569053336978,
"learning_rate": 4.627403846153847e-06,
"loss": -0.0261,
"num_tokens": 605047570.0,
"reward": 1.0034834623336792,
"reward_std": 0.11915335059165955,
"rewards/accuracy_reward": 0.7129340291023254,
"rewards/brier_reward": 0.8068234920501709,
"rewards/confidence_uniqueness_reward": 0.9387187480926513,
"rewards/format_reward": 0.987500011920929,
"rewards/frontier_coverage_0": 0.01569048868259415,
"rewards/frontier_coverage_1": 0.01569048868259415,
"rewards/frontier_coverage_10": 0.03181662876158953,
"rewards/frontier_coverage_15": 0.07589756101369857,
"rewards/frontier_coverage_20": 0.14337013214826583,
"rewards/frontier_coverage_25": 0.2238972157239914,
"rewards/frontier_coverage_5": 0.01603688622417394,
"rewards/frontier_entropy_batch_reward": -0.2875809371471405,
"signal/accuracy_reward/centered_abs_mean": 0.13441297858953477,
"signal/accuracy_reward/group_std_mean": 0.17354630529880524,
"signal/accuracy_reward/group_zero_std_frac": 0.522222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0317163467407227,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06720648929476739,
"signal/advantage_abs_mean": 0.7717400074005127,
"signal/advantage_pre_scale_abs_mean": 0.0905812844634056,
"signal/advantage_pre_scale_std": 0.15285636186599733,
"signal/advantage_std": 0.9829501986503602,
"signal/brier_reward/centered_abs_mean": 0.13663897514343262,
"signal/brier_reward/group_std_mean": 0.17413173317909242,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21040893495082855,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013663897477090358,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030733636021614073,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04751450791954994,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0473335437476635,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003073363611474633,
"signal/format_reward/centered_abs_mean": 0.01868489533662796,
"signal/format_reward/group_std_mean": 0.032993590086698533,
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.14405927509069444,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00934244766831398,
"signal/frontier_coverage_0/centered_abs_mean": 0.1771648645401001,
"signal/frontier_coverage_0/group_std_mean": 0.23024407625198365,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03890465572476387,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025334575679153205,
"signal/frontier_coverage_1/centered_abs_mean": 0.1771648645401001,
"signal/frontier_coverage_1/group_std_mean": 0.23024407625198365,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03890465572476387,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025334575679153205,
"signal/frontier_coverage_10/centered_abs_mean": 0.10589916706085205,
"signal/frontier_coverage_10/group_std_mean": 0.13918049335479737,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02323727458715439,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001514358026906848,
"signal/frontier_coverage_15/centered_abs_mean": 0.0736978754401207,
"signal/frontier_coverage_15/group_std_mean": 0.09158166199922561,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01622140742838383,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001053879619576037,
"signal/frontier_coverage_20/centered_abs_mean": 0.10349708944559097,
"signal/frontier_coverage_20/group_std_mean": 0.13017223328351973,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02279331013560295,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001480008359067142,
"signal/frontier_coverage_25/centered_abs_mean": 0.14353952705860137,
"signal/frontier_coverage_25/group_std_mean": 0.18179913461208344,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03160831183195114,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020526152104139326,
"signal/frontier_coverage_5/centered_abs_mean": 0.17637761533260346,
"signal/frontier_coverage_5/group_std_mean": 0.2292585015296936,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03873150199651718,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025221999734640122,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32779831886291505,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39781845808029176,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5039668500423431,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03277983292937279,
"step": 270
},
{
"calibration/aurc": 0.14417500428320207,
"calibration/batch_distribution_entropy": 0.9710154425145918,
"calibration/buffer_distribution_entropy": 0.9824627731736181,
"calibration/confidence_entropy": 0.4968844676980586,
"calibration/coverage@0%": 0.055456773076542884,
"calibration/coverage@1%": 0.055456773076542884,
"calibration/coverage@10%": 0.43402523337092347,
"calibration/coverage@15%": 0.6135070800685314,
"calibration/coverage@20%": 0.7761960929383052,
"calibration/coverage@25%": 0.872591220602526,
"calibration/coverage@30%": 0.9412395657627265,
"calibration/coverage@5%": 0.22296472683507887,
"calibration/ece": 0.13625054238405615,
"calibration/mean_confidence": 0.5763414081845614,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 4069.8,
"completions/max_terminated_length": 4069.8,
"completions/mean_length": 1096.324658203125,
"completions/mean_terminated_length": 1127.08369140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 310.4,
"epoch": 0.6599917501031237,
"grad_norm": 0.0023030159063637257,
"learning_rate": 4.597355769230769e-06,
"loss": -0.0731,
"num_tokens": 620784142.0,
"reward": 0.9708781123161316,
"reward_std": 0.13851457834243774,
"rewards/accuracy_reward": 0.6647569537162781,
"rewards/brier_reward": 0.7947103142738342,
"rewards/confidence_uniqueness_reward": 0.9273814558982849,
"rewards/format_reward": 0.9723958373069763,
"rewards/frontier_coverage_0": 0.02882022559642792,
"rewards/frontier_coverage_1": 0.028787746839225293,
"rewards/frontier_coverage_10": 0.03391992338001728,
"rewards/frontier_coverage_15": 0.06893513202667237,
"rewards/frontier_coverage_20": 0.12927037924528123,
"rewards/frontier_coverage_25": 0.20069342851638794,
"rewards/frontier_coverage_5": 0.028869583271443844,
"rewards/frontier_entropy_batch_reward": -0.27333354353904726,
"signal/accuracy_reward/centered_abs_mean": 0.1373263880610466,
"signal/accuracy_reward/group_std_mean": 0.18307480216026306,
"signal/accuracy_reward/group_zero_std_frac": 0.4694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9482576966285705,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0686631940305233,
"signal/advantage_abs_mean": 0.7440701246261596,
"signal/advantage_pre_scale_abs_mean": 0.10121009647846221,
"signal/advantage_pre_scale_std": 0.17259745299816132,
"signal/advantage_std": 0.983104145526886,
"signal/brier_reward/centered_abs_mean": 0.1418377786874771,
"signal/brier_reward/group_std_mean": 0.18231958746910096,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19642621278762817,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014183777570724487,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04775775372982025,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07538097202777863,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06636101678013802,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004775775363668799,
"signal/format_reward/centered_abs_mean": 0.0403211809694767,
"signal/format_reward/group_std_mean": 0.06660629361867905,
"signal/format_reward/group_zero_std_frac": 0.75,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.28085810542106626,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02016059048473835,
"signal/frontier_coverage_0/centered_abs_mean": 0.18177134394645691,
"signal/frontier_coverage_0/group_std_mean": 0.2336914509534836,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03591709956526756,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002599330106750131,
"signal/frontier_coverage_1/centered_abs_mean": 0.1816892147064209,
"signal/frontier_coverage_1/group_std_mean": 0.23359167873859404,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03590134456753731,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025981557089835407,
"signal/frontier_coverage_10/centered_abs_mean": 0.10146680474281311,
"signal/frontier_coverage_10/group_std_mean": 0.13296806663274766,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020047903805971146,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001450975309126079,
"signal/frontier_coverage_15/centered_abs_mean": 0.06901453286409379,
"signal/frontier_coverage_15/group_std_mean": 0.08679485768079757,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013635250180959702,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009869078057818115,
"signal/frontier_coverage_20/centered_abs_mean": 0.09845926016569137,
"signal/frontier_coverage_20/group_std_mean": 0.1253434956073761,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019467445090413094,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014079674379900097,
"signal/frontier_coverage_25/centered_abs_mean": 0.1376819759607315,
"signal/frontier_coverage_25/group_std_mean": 0.1762455552816391,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027223770692944526,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019688522443175316,
"signal/frontier_coverage_5/centered_abs_mean": 0.18081392645835875,
"signal/frontier_coverage_5/group_std_mean": 0.23251225650310517,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03572747558355331,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025856390595436094,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32236793637275696,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39191374778747556,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4449836671352386,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03223679475486278,
"step": 275
},
{
"calibration/aurc": 0.09641676239838745,
"calibration/batch_distribution_entropy": 0.9676579395319906,
"calibration/buffer_distribution_entropy": 0.9820208457928654,
"calibration/confidence_entropy": 0.5058189262006992,
"calibration/coverage@0%": 0.06702250595041062,
"calibration/coverage@1%": 0.12578261376712221,
"calibration/coverage@10%": 0.6526884191611103,
"calibration/coverage@15%": 0.76473341845179,
"calibration/coverage@20%": 0.8503245283018869,
"calibration/coverage@25%": 0.9002695417789758,
"calibration/coverage@30%": 0.9428571428571428,
"calibration/coverage@5%": 0.5351988713923174,
"calibration/ece": 0.18722123766397122,
"calibration/mean_confidence": 0.5592271986784572,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03828125,
"completions/max_length": 4062.4,
"completions/max_terminated_length": 4062.4,
"completions/mean_length": 1125.9796997070312,
"completions/mean_terminated_length": 1170.884716796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 323.2,
"epoch": 0.6719916001049987,
"grad_norm": 0.0021780498791486025,
"learning_rate": 4.567307692307692e-06,
"loss": -0.0884,
"num_tokens": 636850724.0,
"reward": 0.9621721744537354,
"reward_std": 0.15094391703605653,
"rewards/accuracy_reward": 0.6716145873069763,
"rewards/brier_reward": 0.7638566136360169,
"rewards/confidence_uniqueness_reward": 0.9151824951171875,
"rewards/format_reward": 0.9613715171813965,
"rewards/frontier_coverage_0": -0.0023802617564797402,
"rewards/frontier_coverage_1": -0.0023802617564797402,
"rewards/frontier_coverage_10": 0.014429821725934744,
"rewards/frontier_coverage_15": 0.05858373343944549,
"rewards/frontier_coverage_20": 0.11382308453321457,
"rewards/frontier_coverage_25": 0.1784544587135315,
"rewards/frontier_coverage_5": -0.0021017659455537796,
"rewards/frontier_entropy_batch_reward": -0.2735038071870804,
"signal/accuracy_reward/centered_abs_mean": 0.14811740517616273,
"signal/accuracy_reward/group_std_mean": 0.19783127307891846,
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9213737845420837,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07405870258808137,
"signal/advantage_abs_mean": 0.747747254371643,
"signal/advantage_pre_scale_abs_mean": 0.1122934266924858,
"signal/advantage_pre_scale_std": 0.18739549219608306,
"signal/advantage_std": 0.9832320809364319,
"signal/brier_reward/centered_abs_mean": 0.1451827973127365,
"signal/brier_reward/group_std_mean": 0.18546995520591736,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18125930428504944,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014518279768526553,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05821017548441887,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0866745539009571,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07207210585474969,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005821018014103174,
"signal/format_reward/centered_abs_mean": 0.0506022147834301,
"signal/format_reward/group_std_mean": 0.07779108807444572,
"signal/format_reward/group_zero_std_frac": 0.7305555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.3118982821702957,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02530110739171505,
"signal/frontier_coverage_0/centered_abs_mean": 0.1833457052707672,
"signal/frontier_coverage_0/group_std_mean": 0.2372480094432831,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03273410275578499,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002621843572705984,
"signal/frontier_coverage_1/centered_abs_mean": 0.1833457052707672,
"signal/frontier_coverage_1/group_std_mean": 0.2372480094432831,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03273410275578499,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002621843572705984,
"signal/frontier_coverage_10/centered_abs_mean": 0.09884552955627442,
"signal/frontier_coverage_10/group_std_mean": 0.1297825500369072,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017642829567193985,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014134910656139255,
"signal/frontier_coverage_15/centered_abs_mean": 0.06081449687480926,
"signal/frontier_coverage_15/group_std_mean": 0.07752282023429871,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010932053439319134,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008696473552845419,
"signal/frontier_coverage_20/centered_abs_mean": 0.08544690757989884,
"signal/frontier_coverage_20/group_std_mean": 0.10949314087629318,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015372510813176633,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012218907242640853,
"signal/frontier_coverage_25/centered_abs_mean": 0.1205233633518219,
"signal/frontier_coverage_25/group_std_mean": 0.15510833263397217,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021653353795409204,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017234840430319308,
"signal/frontier_coverage_5/centered_abs_mean": 0.18253884911537172,
"signal/frontier_coverage_5/group_std_mean": 0.23624739646911622,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03258874006569386,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026103056035935877,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32768973112106325,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3990603029727936,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4122699022293091,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032768973335623744,
"step": 280
},
{
"calibration/aurc": 0.16161388497444104,
"calibration/batch_distribution_entropy": 0.954786223675052,
"calibration/buffer_distribution_entropy": 0.9822368662432682,
"calibration/confidence_entropy": 0.4893096755488703,
"calibration/coverage@0%": 0.012397262621378627,
"calibration/coverage@1%": 0.012397262621378627,
"calibration/coverage@10%": 0.3451227750696987,
"calibration/coverage@15%": 0.42521536791355874,
"calibration/coverage@20%": 0.8219908877178448,
"calibration/coverage@25%": 0.9104244296288853,
"calibration/coverage@30%": 0.9695187165775401,
"calibration/coverage@5%": 0.15790624465730677,
"calibration/ece": 0.1771468328440237,
"calibration/mean_confidence": 0.6157620534496937,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.06805555555555556,
"completions/max_length": 4061.2,
"completions/max_terminated_length": 4061.2,
"completions/mean_length": 1205.688525390625,
"completions/mean_terminated_length": 1293.82998046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 326.6,
"epoch": 0.6839914501068737,
"grad_norm": 0.0017848203424364328,
"learning_rate": 4.537259615384616e-06,
"loss": -0.1574,
"num_tokens": 653843328.0,
"reward": 0.9423358082771301,
"reward_std": 0.1858820378780365,
"rewards/accuracy_reward": 0.6730902671813965,
"rewards/brier_reward": 0.7743986010551452,
"rewards/confidence_uniqueness_reward": 0.881925082206726,
"rewards/format_reward": 0.9314236164093017,
"rewards/frontier_coverage_0": 0.013644175603985786,
"rewards/frontier_coverage_1": 0.013644175603985786,
"rewards/frontier_coverage_10": 0.02565064523369074,
"rewards/frontier_coverage_15": 0.07387386113405228,
"rewards/frontier_coverage_20": 0.14157648533582687,
"rewards/frontier_coverage_25": 0.2188367635011673,
"rewards/frontier_coverage_5": 0.013734235800802708,
"rewards/frontier_entropy_batch_reward": -0.32717219591140745,
"signal/accuracy_reward/centered_abs_mean": 0.16214192807674407,
"signal/accuracy_reward/group_std_mean": 0.21098495423793792,
"signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9308143734931946,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08107096403837204,
"signal/advantage_abs_mean": 0.7548533916473389,
"signal/advantage_pre_scale_abs_mean": 0.1412857949733734,
"signal/advantage_pre_scale_std": 0.23110412061214447,
"signal/advantage_std": 0.9833352923393249,
"signal/brier_reward/centered_abs_mean": 0.1556008368730545,
"signal/brier_reward/group_std_mean": 0.1984753429889679,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17860327661037445,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015560084208846092,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09533466547727584,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1326120525598526,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.10921618342399597,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009533467143774033,
"signal/format_reward/centered_abs_mean": 0.08937717080116273,
"signal/format_reward/group_std_mean": 0.12585532814264297,
"signal/format_reward/group_zero_std_frac": 0.6083333373069764,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5117337226867675,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.04468858540058136,
"signal/frontier_coverage_0/centered_abs_mean": 0.15532127320766448,
"signal/frontier_coverage_0/group_std_mean": 0.20312093198299408,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02545154429972172,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002221094211563468,
"signal/frontier_coverage_1/centered_abs_mean": 0.15532127320766448,
"signal/frontier_coverage_1/group_std_mean": 0.20312093198299408,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02545154429972172,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002221094211563468,
"signal/frontier_coverage_10/centered_abs_mean": 0.08915466368198395,
"signal/frontier_coverage_10/group_std_mean": 0.11783302575349808,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014571213349699973,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012749116867780685,
"signal/frontier_coverage_15/centered_abs_mean": 0.0656839594244957,
"signal/frontier_coverage_15/group_std_mean": 0.08419227302074432,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010779930651187897,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009392806678079069,
"signal/frontier_coverage_20/centered_abs_mean": 0.09882727265357971,
"signal/frontier_coverage_20/group_std_mean": 0.12712617367506027,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016234659403562546,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014132299926131963,
"signal/frontier_coverage_25/centered_abs_mean": 0.14026913046836853,
"signal/frontier_coverage_25/group_std_mean": 0.18065418004989625,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02304657958447933,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020058486377820374,
"signal/frontier_coverage_5/centered_abs_mean": 0.15472302138805388,
"signal/frontier_coverage_5/group_std_mean": 0.2023627281188965,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02535347007215023,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022125390358269216,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3393124520778656,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40504205226898193,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3892597258090973,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0339312456548214,
"step": 285
},
{
"calibration/aurc": 0.14990839978575438,
"calibration/batch_distribution_entropy": 0.9466328987518603,
"calibration/buffer_distribution_entropy": 0.9817441485320545,
"calibration/confidence_entropy": 0.4870812081068852,
"calibration/coverage@0%": 0.022407624505672442,
"calibration/coverage@1%": 0.022407624505672442,
"calibration/coverage@10%": 0.32707504225792317,
"calibration/coverage@15%": 0.5220147394055882,
"calibration/coverage@20%": 0.7693151120990234,
"calibration/coverage@25%": 0.8949514642116956,
"calibration/coverage@30%": 0.9811842627395221,
"calibration/coverage@5%": 0.13891238681756574,
"calibration/ece": 0.11565863919638524,
"calibration/mean_confidence": 0.6153538965509003,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05164930555555556,
"completions/max_length": 4078.0,
"completions/max_terminated_length": 4078.0,
"completions/mean_length": 1220.135107421875,
"completions/mean_terminated_length": 1287.0528076171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 326.6,
"epoch": 0.6959913001087487,
"grad_norm": 0.0017922352999448776,
"learning_rate": 4.507211538461539e-06,
"loss": -0.1127,
"num_tokens": 670995188.0,
"reward": 0.9570554256439209,
"reward_std": 0.1720178633928299,
"rewards/accuracy_reward": 0.6854166626930237,
"rewards/brier_reward": 0.7753046989440918,
"rewards/confidence_uniqueness_reward": 0.8973992824554443,
"rewards/format_reward": 0.9480034589767456,
"rewards/frontier_coverage_0": 0.002874659560620785,
"rewards/frontier_coverage_1": 0.002884892001748085,
"rewards/frontier_coverage_10": 0.015663625486195088,
"rewards/frontier_coverage_15": 0.07120544984936714,
"rewards/frontier_coverage_20": 0.13762863576412201,
"rewards/frontier_coverage_25": 0.21375060081481934,
"rewards/frontier_coverage_5": 0.0030614846386015416,
"rewards/frontier_entropy_batch_reward": -0.3331816494464874,
"signal/accuracy_reward/centered_abs_mean": 0.1560980886220932,
"signal/accuracy_reward/group_std_mean": 0.20737068653106688,
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9261637568473816,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0780490443110466,
"signal/advantage_abs_mean": 0.7416696906089782,
"signal/advantage_pre_scale_abs_mean": 0.12750329077243805,
"signal/advantage_pre_scale_std": 0.2124664753675461,
"signal/advantage_std": 0.9832707643508911,
"signal/brier_reward/centered_abs_mean": 0.14848179519176483,
"signal/brier_reward/group_std_mean": 0.19175553023815156,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17806673645973206,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0148481797426939,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07424457222223282,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10994532108306884,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08886821419000626,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007424457184970379,
"signal/format_reward/centered_abs_mean": 0.06673719584941865,
"signal/format_reward/group_std_mean": 0.10154436230659485,
"signal/format_reward/group_zero_std_frac": 0.6555555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.39830978512763976,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.03336859792470932,
"signal/frontier_coverage_0/centered_abs_mean": 0.1480401337146759,
"signal/frontier_coverage_0/group_std_mean": 0.1944763779640198,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.025522398948669433,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021169739309698345,
"signal/frontier_coverage_1/centered_abs_mean": 0.1480236232280731,
"signal/frontier_coverage_1/group_std_mean": 0.1944561183452606,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.025519952923059464,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021167377941310405,
"signal/frontier_coverage_10/centered_abs_mean": 0.08222000598907471,
"signal/frontier_coverage_10/group_std_mean": 0.10998225659132004,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014115773141384125,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001175746088847518,
"signal/frontier_coverage_15/centered_abs_mean": 0.06811703145503997,
"signal/frontier_coverage_15/group_std_mean": 0.08626431375741958,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011764644645154476,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009740736102685332,
"signal/frontier_coverage_20/centered_abs_mean": 0.10457015037536621,
"signal/frontier_coverage_20/group_std_mean": 0.13225899040699005,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018054082244634628,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014953531557694078,
"signal/frontier_coverage_25/centered_abs_mean": 0.14872863590717317,
"signal/frontier_coverage_25/group_std_mean": 0.18871353566646576,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025641126558184624,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002126819547265768,
"signal/frontier_coverage_5/centered_abs_mean": 0.1474991887807846,
"signal/frontier_coverage_5/group_std_mean": 0.19379670023918152,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02542857564985752,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021092384587973355,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33809652328491213,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4022083759307861,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4096614599227905,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03380965441465378,
"step": 290
},
{
"calibration/aurc": 0.16839003318485468,
"calibration/batch_distribution_entropy": 0.9647584219475576,
"calibration/buffer_distribution_entropy": 0.9819259989411554,
"calibration/confidence_entropy": 0.49521471378087234,
"calibration/coverage@0%": 0.031917463514191564,
"calibration/coverage@1%": 0.031917463514191564,
"calibration/coverage@10%": 0.32524698829330323,
"calibration/coverage@15%": 0.4671416904414487,
"calibration/coverage@20%": 0.6470332978310416,
"calibration/coverage@25%": 0.8226688925601093,
"calibration/coverage@30%": 0.8954452772099831,
"calibration/coverage@5%": 0.17303270682289534,
"calibration/ece": 0.17856220266263753,
"calibration/mean_confidence": 0.5398482647393512,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05104166666666667,
"completions/max_length": 4080.8,
"completions/max_terminated_length": 4080.8,
"completions/mean_length": 1236.2116455078126,
"completions/mean_terminated_length": 1303.0637939453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 344.6,
"epoch": 0.7079911501106236,
"grad_norm": 0.001917012850753963,
"learning_rate": 4.477163461538462e-06,
"loss": -0.1196,
"num_tokens": 688321818.0,
"reward": 0.9414116501808166,
"reward_std": 0.1707327514886856,
"rewards/accuracy_reward": 0.6397569417953491,
"rewards/brier_reward": 0.7684574365615845,
"rewards/confidence_uniqueness_reward": 0.9021074414253235,
"rewards/format_reward": 0.9486979246139526,
"rewards/frontier_coverage_0": 0.029088782146573067,
"rewards/frontier_coverage_1": 0.029088782146573067,
"rewards/frontier_coverage_10": 0.02980848792940378,
"rewards/frontier_coverage_15": 0.06237031891942024,
"rewards/frontier_coverage_20": 0.115201236307621,
"rewards/frontier_coverage_25": 0.17910198271274566,
"rewards/frontier_coverage_5": 0.029053746536374093,
"rewards/frontier_entropy_batch_reward": -0.2664636343717575,
"signal/accuracy_reward/centered_abs_mean": 0.15848524272441863,
"signal/accuracy_reward/group_std_mean": 0.20676854848861695,
"signal/accuracy_reward/group_zero_std_frac": 0.4194444417953491,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.967148220539093,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07924262136220932,
"signal/advantage_abs_mean": 0.7537167191505432,
"signal/advantage_pre_scale_abs_mean": 0.12879492938518525,
"signal/advantage_pre_scale_std": 0.2127348393201828,
"signal/advantage_std": 0.9832599520683288,
"signal/brier_reward/centered_abs_mean": 0.1589398592710495,
"signal/brier_reward/group_std_mean": 0.199799045920372,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1937939405441284,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015893985889852046,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07883353978395462,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11305341869592667,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09597876071929931,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007883354183286428,
"signal/format_reward/centered_abs_mean": 0.07265082374215126,
"signal/format_reward/group_std_mean": 0.10599421262741089,
"signal/format_reward/group_zero_std_frac": 0.6611111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.441804563999176,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.03632541187107563,
"signal/frontier_coverage_0/centered_abs_mean": 0.18514932990074157,
"signal/frontier_coverage_0/group_std_mean": 0.23785326182842254,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032265615090727805,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00264763543382287,
"signal/frontier_coverage_1/centered_abs_mean": 0.18514932990074157,
"signal/frontier_coverage_1/group_std_mean": 0.23785326182842254,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032265615090727805,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00264763543382287,
"signal/frontier_coverage_10/centered_abs_mean": 0.101753930747509,
"signal/frontier_coverage_10/group_std_mean": 0.13369067162275314,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017728328704833984,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014550811843946575,
"signal/frontier_coverage_15/centered_abs_mean": 0.0646564818918705,
"signal/frontier_coverage_15/group_std_mean": 0.0819364532828331,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011319943889975547,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009245876688510179,
"signal/frontier_coverage_20/centered_abs_mean": 0.09063917696475983,
"signal/frontier_coverage_20/group_std_mean": 0.11509868800640106,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015911070629954338,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001296140206977725,
"signal/frontier_coverage_25/centered_abs_mean": 0.12784689664840698,
"signal/frontier_coverage_25/group_std_mean": 0.1628873258829117,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02245633341372013,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001828210591338575,
"signal/frontier_coverage_5/centered_abs_mean": 0.184427148103714,
"signal/frontier_coverage_5/group_std_mean": 0.23696613907814026,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03213928528130054,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026373081840574742,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31256569623947145,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3834408223628998,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38236083984375,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031256569921970366,
"step": 295
},
{
"calibration/aurc": 0.12078319690376119,
"calibration/batch_distribution_entropy": 0.9276962294256655,
"calibration/buffer_distribution_entropy": 0.9821949128055089,
"calibration/confidence_entropy": 0.4829753078536876,
"calibration/coverage@0%": 0.03940227375009563,
"calibration/coverage@1%": 0.03940227375009563,
"calibration/coverage@10%": 0.42528907795356624,
"calibration/coverage@15%": 0.722236752314575,
"calibration/coverage@20%": 0.854863809636678,
"calibration/coverage@25%": 0.9216402608421896,
"calibration/coverage@30%": 0.9679343546671456,
"calibration/coverage@5%": 0.27418103847954345,
"calibration/ece": 0.11320676820814626,
"calibration/mean_confidence": 0.6328128382779166,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.036111111111111115,
"completions/max_length": 4062.0,
"completions/max_terminated_length": 4062.0,
"completions/mean_length": 1195.3615478515626,
"completions/mean_terminated_length": 1240.4994140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 286.8,
"epoch": 0.7199910001124986,
"grad_norm": 0.0018773460760712624,
"learning_rate": 4.447115384615385e-06,
"loss": -0.0981,
"num_tokens": 705193695.0,
"reward": 0.9711301684379577,
"reward_std": 0.15473923683166504,
"rewards/accuracy_reward": 0.6816840291023254,
"rewards/brier_reward": 0.8024370551109314,
"rewards/confidence_uniqueness_reward": 0.9131479620933532,
"rewards/format_reward": 0.9635416746139527,
"rewards/frontier_coverage_0": 0.0319746870547533,
"rewards/frontier_coverage_1": 0.0319746870547533,
"rewards/frontier_coverage_10": 0.036012591794133186,
"rewards/frontier_coverage_15": 0.07915448248386384,
"rewards/frontier_coverage_20": 0.1457726925611496,
"rewards/frontier_coverage_25": 0.22427182495594025,
"rewards/frontier_coverage_5": 0.032045964151620865,
"rewards/frontier_entropy_batch_reward": -0.3135247349739075,
"signal/accuracy_reward/centered_abs_mean": 0.141259765625,
"signal/accuracy_reward/group_std_mean": 0.18782249391078948,
"signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9199034810066223,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0706298828125,
"signal/advantage_abs_mean": 0.7486230492591858,
"signal/advantage_pre_scale_abs_mean": 0.11478613466024398,
"signal/advantage_pre_scale_std": 0.19818316996097565,
"signal/advantage_std": 0.9831642985343934,
"signal/brier_reward/centered_abs_mean": 0.1412952125072479,
"signal/brier_reward/group_std_mean": 0.18475831747055055,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1854201763868332,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014129521884024143,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06423628926277161,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09473485499620438,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08357910513877868,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006423629261553288,
"signal/format_reward/centered_abs_mean": 0.05540364682674408,
"signal/format_reward/group_std_mean": 0.08460389599204063,
"signal/format_reward/group_zero_std_frac": 0.7111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.3581722557544708,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02770182341337204,
"signal/frontier_coverage_0/centered_abs_mean": 0.16017161309719086,
"signal/frontier_coverage_0/group_std_mean": 0.21179381310939788,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029991919174790382,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022904541343450545,
"signal/frontier_coverage_1/centered_abs_mean": 0.16017161309719086,
"signal/frontier_coverage_1/group_std_mean": 0.21179381310939788,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029991919174790382,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022904541343450545,
"signal/frontier_coverage_10/centered_abs_mean": 0.08997779488563537,
"signal/frontier_coverage_10/group_std_mean": 0.12086162716150284,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016862975619733333,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012866824865341187,
"signal/frontier_coverage_15/centered_abs_mean": 0.06877997815608979,
"signal/frontier_coverage_15/group_std_mean": 0.08738072514533997,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013019220717251302,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009835536940954626,
"signal/frontier_coverage_20/centered_abs_mean": 0.09969817698001862,
"signal/frontier_coverage_20/group_std_mean": 0.1269403502345085,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018895361199975015,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014256839640438557,
"signal/frontier_coverage_25/centered_abs_mean": 0.13946891725063323,
"signal/frontier_coverage_25/group_std_mean": 0.1778358042240143,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026414673030376434,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001994405430741608,
"signal/frontier_coverage_5/centered_abs_mean": 0.1596672624349594,
"signal/frontier_coverage_5/group_std_mean": 0.21115436553955078,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0298971451818943,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022832419257611037,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34117398262023924,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4078505575656891,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.451895147562027,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03411739841103554,
"step": 300
},
{
"epoch": 0.7199910001124986,
"eval_calibration/aurc": 0.14135630737203364,
"eval_calibration/batch_distribution_entropy": 0.8794302039147933,
"eval_calibration/buffer_distribution_entropy": 0.9816624202161491,
"eval_calibration/confidence_entropy": 0.47957698486476735,
"eval_calibration/coverage@0%": 0.26187275985663083,
"eval_calibration/coverage@1%": 0.26187275985663083,
"eval_calibration/coverage@10%": 0.4733982974910394,
"eval_calibration/coverage@15%": 0.5693436379928316,
"eval_calibration/coverage@20%": 0.8225312075145222,
"eval_calibration/coverage@25%": 0.9220059325176123,
"eval_calibration/coverage@30%": 0.9833333333333334,
"eval_calibration/coverage@5%": 0.27749775985663083,
"eval_calibration/ece": 0.2013177881287851,
"eval_calibration/mean_confidence": 0.639016019496972,
"eval_completions/clipped_ratio": 0.033854166666666685,
"eval_completions/max_length": 3558.6666666666665,
"eval_completions/max_terminated_length": 3558.6666666666665,
"eval_completions/mean_length": 1123.0069783528645,
"eval_completions/mean_terminated_length": 1162.5458374023438,
"eval_completions/min_length": 74.16666666666667,
"eval_completions/min_terminated_length": 340.0,
"eval_loss": 0.0,
"eval_num_tokens": 705193695.0,
"eval_reward": 0.8927736977736155,
"eval_reward_std": 0.290027916431427,
"eval_rewards/accuracy_reward": 0.668402781089147,
"eval_rewards/brier_reward": 0.7983585397402445,
"eval_rewards/confidence_uniqueness_reward": 0.8560836017131805,
"eval_rewards/format_reward": 0.9618055621782938,
"eval_rewards/frontier_coverage_0": 0.03550082134703795,
"eval_rewards/frontier_coverage_1": 0.03550082134703795,
"eval_rewards/frontier_coverage_10": 0.036372952007999025,
"eval_rewards/frontier_coverage_15": 0.07793260862429936,
"eval_rewards/frontier_coverage_20": 0.1448104108373324,
"eval_rewards/frontier_coverage_25": 0.22215457757314047,
"eval_rewards/frontier_coverage_5": 0.03555072944921752,
"eval_rewards/frontier_entropy_batch_reward": -0.9618055621782938,
"eval_runtime": 226.3973,
"eval_samples_per_second": 4.417,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4308810730775197,
"eval_signal/accuracy_reward/group_std_mean": 0.4704217165708542,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7596533397833506,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21544053653875986,
"eval_signal/advantage_abs_mean": 0.8504889905452728,
"eval_signal/advantage_pre_scale_abs_mean": 0.24683604389429092,
"eval_signal/advantage_pre_scale_std": 0.28865334888299304,
"eval_signal/advantage_std": 0.9864764511585236,
"eval_signal/brier_reward/centered_abs_mean": 0.20618696510791779,
"eval_signal/brier_reward/group_std_mean": 0.2769654293855031,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07253366460402806,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020618697938819725,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.08746503914395969,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1652904860675335,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030453757693370182,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008746504162748655,
"eval_signal/format_reward/centered_abs_mean": 0.07161458333333333,
"eval_signal/format_reward/group_std_mean": 0.16531085719664892,
"eval_signal/format_reward/group_zero_std_frac": 0.22222222636143366,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.12363888944188754,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.035807291666666664,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.21314354240894318,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3162529617547989,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01078090537339449,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00304795258368055,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.21314354240894318,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3162529617547989,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01078090537339449,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00304795258368055,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.11475640162825584,
"eval_signal/frontier_coverage_10/group_std_mean": 0.17651239037513733,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005812383955344558,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016410165311147769,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.10023303826649983,
"eval_signal/frontier_coverage_15/group_std_mean": 0.12652035181721052,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005055272563671072,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014333324312853317,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.18804885198672613,
"eval_signal/frontier_coverage_20/group_std_mean": 0.23272972305615744,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00948300507540504,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002689098434833189,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2883477956056595,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3521091441313426,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014543836625913778,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004123373539187014,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.21239722271760306,
"eval_signal/frontier_coverage_5/group_std_mean": 0.31527013083299,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.010743242222815752,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030372802478571734,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07161458333333333,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.16531085719664892,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.22222222636143366,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024727776025732357,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0071614584885537624,
"eval_steps_per_second": 0.027,
"step": 300
},
{
"epoch": 0.7199910001124986,
"step": 300,
"train_probe_calibration/aurc": 0.10742759964817182,
"train_probe_calibration/batch_distribution_entropy": 0.8882509027982718,
"train_probe_calibration/buffer_distribution_entropy": 0.9818274202682408,
"train_probe_calibration/confidence_entropy": 0.46353220505448417,
"train_probe_calibration/coverage@0%": 0.41935483870967744,
"train_probe_calibration/coverage@1%": 0.41935483870967744,
"train_probe_calibration/coverage@10%": 0.5690524193548386,
"train_probe_calibration/coverage@15%": 0.6693548387096774,
"train_probe_calibration/coverage@20%": 0.8776881720430106,
"train_probe_calibration/coverage@25%": 0.9307795698924731,
"train_probe_calibration/coverage@30%": 0.9734543010752689,
"train_probe_calibration/coverage@5%": 0.47278225806451607,
"train_probe_calibration/ece": 0.1894395127688172,
"train_probe_calibration/mean_confidence": 0.6430341095430108,
"train_probe_completions/clipped_ratio": 0.03368055555555555,
"train_probe_completions/max_length": 3965.5,
"train_probe_completions/max_terminated_length": 3965.5,
"train_probe_completions/mean_length": 1158.6050821940105,
"train_probe_completions/mean_terminated_length": 1198.6514078776042,
"train_probe_completions/min_length": 0.0,
"train_probe_completions/min_terminated_length": 332.8333333333333,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 705193695.0,
"train_probe_reward": 0.920450339714686,
"train_probe_reward_std": 0.2729005167881648,
"train_probe_rewards/accuracy_reward": 0.7170138955116272,
"train_probe_rewards/brier_reward": 0.8147866725921631,
"train_probe_rewards/confidence_uniqueness_reward": 0.855155328909556,
"train_probe_rewards/format_reward": 0.9661458333333334,
"train_probe_rewards/frontier_coverage_0": 0.017018629354424775,
"train_probe_rewards/frontier_coverage_1": 0.017018629354424775,
"train_probe_rewards/frontier_coverage_10": 0.026363508426584303,
"train_probe_rewards/frontier_coverage_15": 0.08919420217474301,
"train_probe_rewards/frontier_coverage_20": 0.1686765750249227,
"train_probe_rewards/frontier_coverage_25": 0.2584116756916046,
"train_probe_rewards/frontier_coverage_5": 0.01708184430996577,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9661458333333334,
"train_probe_runtime": 225.113,
"train_probe_samples_per_second": 4.442,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3924696197112401,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4475194712479909,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7479262252648672,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19623480985562006,
"train_probe_signal/advantage_abs_mean": 0.8123607436815897,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.22337998201449713,
"train_probe_signal/advantage_pre_scale_std": 0.27356437345345813,
"train_probe_signal/advantage_std": 0.9864482978979746,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1891830414533615,
"train_probe_signal/brier_reward/group_std_mean": 0.2585650583108266,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07193443675835927,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.018918303151925404,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0852524774769942,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.15537608787417412,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03227034925172726,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008525247918441892,
"train_probe_signal/format_reward/centered_abs_mean": 0.06331380208333333,
"train_probe_signal/format_reward/group_std_mean": 0.14678262422482172,
"train_probe_signal/format_reward/group_zero_std_frac": 0.305555559694767,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.11950643360614777,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.031656901041666664,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2071551432212194,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.31829215089480084,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011326478483776251,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002962318443072339,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2071551432212194,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.31829215089480084,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011326478483776251,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002962318443072339,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.11016559849182765,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.17865454157193503,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006017673372601469,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015753680490888655,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.09809967502951622,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.12173208470145862,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005335134997343023,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014028252529290814,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.18012393762667975,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.2199820727109909,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009804089398433765,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025757723099862537,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.27247088154157,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.33070384462674457,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014833726920187473,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038963335876663527,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2064915026227633,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.31740186115105945,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01129010800893108,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002952828382452329,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06331380208333333,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.14678262422482172,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.305555559694767,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023901287155846756,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.006331380379075806,
"train_probe_steps_per_second": 0.027
},
{
"calibration/aurc": 0.1552747603116913,
"calibration/batch_distribution_entropy": 0.9694031593763339,
"calibration/buffer_distribution_entropy": 0.981981676252941,
"calibration/confidence_entropy": 0.4865835211273901,
"calibration/coverage@0%": 0.07719467074264807,
"calibration/coverage@1%": 0.08500717074264807,
"calibration/coverage@10%": 0.41726822600335883,
"calibration/coverage@15%": 0.6069180800905718,
"calibration/coverage@20%": 0.7214083784261371,
"calibration/coverage@25%": 0.8297344820813896,
"calibration/coverage@30%": 0.8809117190489004,
"calibration/coverage@5%": 0.2596130214264611,
"calibration/ece": 0.12211821587917515,
"calibration/mean_confidence": 0.572359487035709,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.039843749999999976,
"completions/max_length": 4082.4,
"completions/max_terminated_length": 4082.4,
"completions/mean_length": 1209.8580322265625,
"completions/mean_terminated_length": 1260.188671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 287.8,
"epoch": 0.7319908501143736,
"grad_norm": 0.00178525282535702,
"learning_rate": 4.4170673076923085e-06,
"loss": -0.0993,
"num_tokens": 722246043.0,
"reward": 0.9593374133110046,
"reward_std": 0.1680666208267212,
"rewards/accuracy_reward": 0.6590277671813964,
"rewards/brier_reward": 0.7911885142326355,
"rewards/confidence_uniqueness_reward": 0.9110708355903625,
"rewards/format_reward": 0.9598958253860473,
"rewards/frontier_coverage_0": 0.03790535945445299,
"rewards/frontier_coverage_1": 0.03790535945445299,
"rewards/frontier_coverage_10": 0.03527109958231449,
"rewards/frontier_coverage_15": 0.07195997387170791,
"rewards/frontier_coverage_20": 0.13255858570337295,
"rewards/frontier_coverage_25": 0.20439787209033966,
"rewards/frontier_coverage_5": 0.037907212227582934,
"rewards/frontier_entropy_batch_reward": -0.28328379392623904,
"signal/accuracy_reward/centered_abs_mean": 0.17368706464767455,
"signal/accuracy_reward/group_std_mean": 0.2288749635219574,
"signal/accuracy_reward/group_zero_std_frac": 0.35277777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9983350157737731,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08684353232383728,
"signal/advantage_abs_mean": 0.7509189486503601,
"signal/advantage_pre_scale_abs_mean": 0.1267881840467453,
"signal/advantage_pre_scale_std": 0.20221469700336456,
"signal/advantage_std": 0.983331310749054,
"signal/brier_reward/centered_abs_mean": 0.1563771367073059,
"signal/brier_reward/group_std_mean": 0.19976865351200104,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17966135144233703,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015637714229524136,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06652242168784142,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09470682889223099,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07581292390823365,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006652241852134466,
"signal/format_reward/centered_abs_mean": 0.05766059011220932,
"signal/format_reward/group_std_mean": 0.08429588973522187,
"signal/format_reward/group_zero_std_frac": 0.7277777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.32826632261276245,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02883029505610466,
"signal/frontier_coverage_0/centered_abs_mean": 0.17973176538944244,
"signal/frontier_coverage_0/group_std_mean": 0.23481388986110688,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029676606878638268,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002570164296776056,
"signal/frontier_coverage_1/centered_abs_mean": 0.17973176538944244,
"signal/frontier_coverage_1/group_std_mean": 0.23481388986110688,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029676606878638268,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002570164296776056,
"signal/frontier_coverage_10/centered_abs_mean": 0.1035152941942215,
"signal/frontier_coverage_10/group_std_mean": 0.13700433671474457,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017153031565248965,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014802686870098113,
"signal/frontier_coverage_15/centered_abs_mean": 0.07050866931676865,
"signal/frontier_coverage_15/group_std_mean": 0.08939850181341172,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011573206260800361,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010082739987410604,
"signal/frontier_coverage_20/centered_abs_mean": 0.10300731658935547,
"signal/frontier_coverage_20/group_std_mean": 0.13183286190032958,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016876140236854555,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00147300458047539,
"signal/frontier_coverage_25/centered_abs_mean": 0.14747728109359742,
"signal/frontier_coverage_25/group_std_mean": 0.1892870306968689,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02416311949491501,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021089251386001706,
"signal/frontier_coverage_5/centered_abs_mean": 0.17917352616786958,
"signal/frontier_coverage_5/group_std_mean": 0.2341161251068115,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02958527356386185,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025621813256293535,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3193290412425995,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38982054591178894,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3665937602519989,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03193290457129479,
"step": 305
},
{
"calibration/aurc": 0.1301794676312849,
"calibration/batch_distribution_entropy": 0.9625952686521343,
"calibration/buffer_distribution_entropy": 0.9821861615896477,
"calibration/confidence_entropy": 0.49521016438445387,
"calibration/coverage@0%": 0.03926732235685257,
"calibration/coverage@1%": 0.03926732235685257,
"calibration/coverage@10%": 0.5030335020335958,
"calibration/coverage@15%": 0.6679785970132144,
"calibration/coverage@20%": 0.8117728337153588,
"calibration/coverage@25%": 0.896987333105101,
"calibration/coverage@30%": 0.9451086956521738,
"calibration/coverage@5%": 0.28356458982235416,
"calibration/ece": 0.18017739107118977,
"calibration/mean_confidence": 0.5452775766771263,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011197916666666651,
"completions/max_length": 4044.2,
"completions/max_terminated_length": 4044.2,
"completions/mean_length": 1101.816162109375,
"completions/mean_terminated_length": 1114.2768798828124,
"completions/min_length": 0.0,
"completions/min_terminated_length": 294.0,
"epoch": 0.7439907001162486,
"grad_norm": 0.002012076321989298,
"learning_rate": 4.3870192307692315e-06,
"loss": -0.0239,
"num_tokens": 738024053.0,
"reward": 0.9944738268852233,
"reward_std": 0.131025093793869,
"rewards/accuracy_reward": 0.6964409589767456,
"rewards/brier_reward": 0.8037665963172913,
"rewards/confidence_uniqueness_reward": 0.9392407178878784,
"rewards/format_reward": 0.9881944417953491,
"rewards/frontier_coverage_0": 0.01375200878828764,
"rewards/frontier_coverage_1": 0.01375200878828764,
"rewards/frontier_coverage_10": 0.026098747923970222,
"rewards/frontier_coverage_15": 0.06501503065228462,
"rewards/frontier_coverage_20": 0.12437251508235932,
"rewards/frontier_coverage_25": 0.1967363566160202,
"rewards/frontier_coverage_5": 0.01382654495537281,
"rewards/frontier_entropy_batch_reward": -0.28630446195602416,
"signal/accuracy_reward/centered_abs_mean": 0.1478461354970932,
"signal/accuracy_reward/group_std_mean": 0.19731209874153138,
"signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9674266099929809,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0739230677485466,
"signal/advantage_abs_mean": 0.7382626414299012,
"signal/advantage_pre_scale_abs_mean": 0.09559480696916581,
"signal/advantage_pre_scale_std": 0.1574586659669876,
"signal/advantage_std": 0.9831644654273987,
"signal/brier_reward/centered_abs_mean": 0.12786187827587128,
"signal/brier_reward/group_std_mean": 0.16800876557826996,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16815095245838166,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01278618685901165,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031389427930116655,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05517948716878891,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041796249151229856,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031389428302645685,
"signal/format_reward/centered_abs_mean": 0.02082248255610466,
"signal/format_reward/group_std_mean": 0.04276073575019836,
"signal/format_reward/group_zero_std_frac": 0.8111111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1391352728009224,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01041124127805233,
"signal/frontier_coverage_0/centered_abs_mean": 0.17754314541816713,
"signal/frontier_coverage_0/group_std_mean": 0.2308153033256531,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033347847312688826,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002538866922259331,
"signal/frontier_coverage_1/centered_abs_mean": 0.17754314541816713,
"signal/frontier_coverage_1/group_std_mean": 0.2308153033256531,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033347847312688826,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002538866922259331,
"signal/frontier_coverage_10/centered_abs_mean": 0.09596252888441086,
"signal/frontier_coverage_10/group_std_mean": 0.12769888788461686,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01809079311788082,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013722641393542289,
"signal/frontier_coverage_15/centered_abs_mean": 0.06407084167003632,
"signal/frontier_coverage_15/group_std_mean": 0.08187362253665924,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012147468142211437,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009162130183540285,
"signal/frontier_coverage_20/centered_abs_mean": 0.09221599698066711,
"signal/frontier_coverage_20/group_std_mean": 0.11842593848705292,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01745458468794823,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013186887372285128,
"signal/frontier_coverage_25/centered_abs_mean": 0.13106826245784758,
"signal/frontier_coverage_25/group_std_mean": 0.168903848528862,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024775386229157446,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018742761574685573,
"signal/frontier_coverage_5/centered_abs_mean": 0.17703687250614167,
"signal/frontier_coverage_5/group_std_mean": 0.23018013834953308,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03325313590466976,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002531627379357815,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3299850106239319,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3956158757209778,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4360787570476532,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03299850150942803,
"step": 310
},
{
"calibration/aurc": 0.1366955880628234,
"calibration/batch_distribution_entropy": 0.9618162442123754,
"calibration/buffer_distribution_entropy": 0.9831649324424259,
"calibration/confidence_entropy": 0.46992879118155806,
"calibration/coverage@0%": 0.03888815794752962,
"calibration/coverage@1%": 0.07458369600527241,
"calibration/coverage@10%": 0.5571547123475279,
"calibration/coverage@15%": 0.6430476811145726,
"calibration/coverage@20%": 0.689326318413781,
"calibration/coverage@25%": 0.8121137739326814,
"calibration/coverage@30%": 0.948553667633651,
"calibration/coverage@5%": 0.29792946199505,
"calibration/ece": 0.17161861650452742,
"calibration/mean_confidence": 0.5649337775994152,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555557,
"completions/max_length": 4013.2,
"completions/max_terminated_length": 4013.2,
"completions/mean_length": 1085.9829833984375,
"completions/mean_terminated_length": 1097.5322021484376,
"completions/min_length": 0.0,
"completions/min_terminated_length": 277.8,
"epoch": 0.7559905501181235,
"grad_norm": 0.0018735633930191398,
"learning_rate": 4.356971153846154e-06,
"loss": -0.0221,
"num_tokens": 753646737.0,
"reward": 0.9998907804489136,
"reward_std": 0.12072976231575012,
"rewards/accuracy_reward": 0.70703125,
"rewards/brier_reward": 0.8256863355636597,
"rewards/confidence_uniqueness_reward": 0.9371930837631226,
"rewards/format_reward": 0.9894965410232544,
"rewards/frontier_coverage_0": 0.031487956270575525,
"rewards/frontier_coverage_1": 0.031487956270575525,
"rewards/frontier_coverage_10": 0.03752702244091779,
"rewards/frontier_coverage_15": 0.0800102636218071,
"rewards/frontier_coverage_20": 0.15115441083908082,
"rewards/frontier_coverage_25": 0.23473278284072877,
"rewards/frontier_coverage_5": 0.03159824721515179,
"rewards/frontier_entropy_batch_reward": -0.33212465047836304,
"signal/accuracy_reward/centered_abs_mean": 0.1304741770029068,
"signal/accuracy_reward/group_std_mean": 0.17876172661781312,
"signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9496649146080017,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0652370885014534,
"signal/advantage_abs_mean": 0.7483431100845337,
"signal/advantage_pre_scale_abs_mean": 0.08890924602746964,
"signal/advantage_pre_scale_std": 0.149673992395401,
"signal/advantage_std": 0.9830163478851318,
"signal/brier_reward/centered_abs_mean": 0.12116939425468445,
"signal/brier_reward/group_std_mean": 0.15918799936771394,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17770620584487914,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012116939388215541,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030384134128689767,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04616203308105469,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.044592789560556415,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003038413543254137,
"signal/format_reward/centered_abs_mean": 0.016954209841787814,
"signal/format_reward/group_std_mean": 0.02980217821896076,
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12439082860946656,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008477104920893907,
"signal/frontier_coverage_0/centered_abs_mean": 0.1542992562055588,
"signal/frontier_coverage_0/group_std_mean": 0.205556982755661,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032318027690052986,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022064793156459928,
"signal/frontier_coverage_1/centered_abs_mean": 0.1542992562055588,
"signal/frontier_coverage_1/group_std_mean": 0.205556982755661,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032318027690052986,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022064793156459928,
"signal/frontier_coverage_10/centered_abs_mean": 0.08488886803388596,
"signal/frontier_coverage_10/group_std_mean": 0.11515198647975922,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017752321809530257,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012139108264818787,
"signal/frontier_coverage_15/centered_abs_mean": 0.06964522302150726,
"signal/frontier_coverage_15/group_std_mean": 0.08731711953878403,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014650024473667145,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009959267335943878,
"signal/frontier_coverage_20/centered_abs_mean": 0.10206463634967804,
"signal/frontier_coverage_20/group_std_mean": 0.12885676175355912,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021465276554226875,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001459524338133633,
"signal/frontier_coverage_25/centered_abs_mean": 0.14236523509025573,
"signal/frontier_coverage_25/group_std_mean": 0.1811072200536728,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029910705611109735,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002035822859033942,
"signal/frontier_coverage_5/centered_abs_mean": 0.1539049506187439,
"signal/frontier_coverage_5/group_std_mean": 0.2050553798675537,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03223489001393318,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022008407860994337,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34531130194664,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4141314446926117,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5073081076145172,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03453113064169884,
"step": 315
},
{
"calibration/aurc": 0.1532792820022398,
"calibration/batch_distribution_entropy": 0.9765106952542615,
"calibration/buffer_distribution_entropy": 0.9830529412034188,
"calibration/confidence_entropy": 0.48771022732878855,
"calibration/coverage@0%": 0.0503317881131927,
"calibration/coverage@1%": 0.0503317881131927,
"calibration/coverage@10%": 0.36972495473163974,
"calibration/coverage@15%": 0.5327097308207017,
"calibration/coverage@20%": 0.7291624390348835,
"calibration/coverage@25%": 0.8552638694054415,
"calibration/coverage@30%": 0.9451060675235279,
"calibration/coverage@5%": 0.10596526819151841,
"calibration/ece": 0.1575374552154884,
"calibration/mean_confidence": 0.5512514130689602,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013454861111111115,
"completions/max_length": 4019.6,
"completions/max_terminated_length": 4019.6,
"completions/mean_length": 1104.9857666015625,
"completions/mean_terminated_length": 1119.9865234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 297.4,
"epoch": 0.7679904001199985,
"grad_norm": 0.002138911047950387,
"learning_rate": 4.326923076923077e-06,
"loss": -0.0288,
"num_tokens": 769469421.0,
"reward": 0.9860237836837769,
"reward_std": 0.12763068974018096,
"rewards/accuracy_reward": 0.6813367962837219,
"rewards/brier_reward": 0.8128754019737243,
"rewards/confidence_uniqueness_reward": 0.9359204173088074,
"rewards/format_reward": 0.9860242962837219,
"rewards/frontier_coverage_0": 0.03168969838880002,
"rewards/frontier_coverage_1": 0.03168969838880002,
"rewards/frontier_coverage_10": 0.03532592952251434,
"rewards/frontier_coverage_15": 0.07133440673351288,
"rewards/frontier_coverage_20": 0.13430507332086564,
"rewards/frontier_coverage_25": 0.20982736349105835,
"rewards/frontier_coverage_5": 0.03171238908544183,
"rewards/frontier_entropy_batch_reward": -0.3034252643585205,
"signal/accuracy_reward/centered_abs_mean": 0.13915473371744155,
"signal/accuracy_reward/group_std_mean": 0.1901726096868515,
"signal/accuracy_reward/group_zero_std_frac": 0.4361111164093018,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.949447751045227,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06957736685872078,
"signal/advantage_abs_mean": 0.7514520049095154,
"signal/advantage_pre_scale_abs_mean": 0.09422855377197266,
"signal/advantage_pre_scale_std": 0.1557147890329361,
"signal/advantage_std": 0.9831169128417969,
"signal/brier_reward/centered_abs_mean": 0.1259630024433136,
"signal/brier_reward/group_std_mean": 0.16163697242736816,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1725175768136978,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012596299685537816,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03393393531441689,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05093328282237053,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04678136818110943,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033933936385437844,
"signal/format_reward/centered_abs_mean": 0.02191297700628638,
"signal/format_reward/group_std_mean": 0.03627087995409965,
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1513817459344864,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01095648850314319,
"signal/frontier_coverage_0/centered_abs_mean": 0.1669444590806961,
"signal/frontier_coverage_0/group_std_mean": 0.21839172542095184,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03264324963092804,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023873056750744583,
"signal/frontier_coverage_1/centered_abs_mean": 0.1669444590806961,
"signal/frontier_coverage_1/group_std_mean": 0.21839172542095184,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03264324963092804,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023873056750744583,
"signal/frontier_coverage_10/centered_abs_mean": 0.0943936437368393,
"signal/frontier_coverage_10/group_std_mean": 0.1261305809020996,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018443511798977852,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013498291140422226,
"signal/frontier_coverage_15/centered_abs_mean": 0.06663856953382492,
"signal/frontier_coverage_15/group_std_mean": 0.08290167152881622,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013056344538927078,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009529315866529942,
"signal/frontier_coverage_20/centered_abs_mean": 0.09651756435632705,
"signal/frontier_coverage_20/group_std_mean": 0.12127373814582824,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018911157548427582,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001380201173014939,
"signal/frontier_coverage_25/centered_abs_mean": 0.13597081303596498,
"signal/frontier_coverage_25/group_std_mean": 0.17202906012535096,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026631006225943565,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019443826517090202,
"signal/frontier_coverage_5/centered_abs_mean": 0.16659797430038453,
"signal/frontier_coverage_5/group_std_mean": 0.21796338260173798,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03257517628371716,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002382350992411375,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33233543038368224,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3965053200721741,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45477959513664246,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03323354199528694,
"step": 320
},
{
"calibration/aurc": 0.15820918899673367,
"calibration/batch_distribution_entropy": 0.9448516087091381,
"calibration/buffer_distribution_entropy": 0.9834185299134219,
"calibration/confidence_entropy": 0.5218543693267863,
"calibration/coverage@0%": 0.009955946485114536,
"calibration/coverage@1%": 0.009955946485114536,
"calibration/coverage@10%": 0.29899489168223903,
"calibration/coverage@15%": 0.5591289854743147,
"calibration/coverage@20%": 0.8303993936284947,
"calibration/coverage@25%": 0.9335853621861032,
"calibration/coverage@30%": 0.9718015665796343,
"calibration/coverage@5%": 0.0970347483189584,
"calibration/ece": 0.17910890874457702,
"calibration/mean_confidence": 0.6065439761899268,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008854166666666673,
"completions/max_length": 4060.0,
"completions/max_terminated_length": 4060.0,
"completions/mean_length": 1113.4197265625,
"completions/mean_terminated_length": 1123.4944091796874,
"completions/min_length": 0.0,
"completions/min_terminated_length": 248.2,
"epoch": 0.7799902501218735,
"grad_norm": 0.001976795494556427,
"learning_rate": 4.296875e-06,
"loss": -0.0172,
"num_tokens": 785408880.0,
"reward": 1.0012445449829102,
"reward_std": 0.12599269896745682,
"rewards/accuracy_reward": 0.7080729246139527,
"rewards/brier_reward": 0.8102705597877502,
"rewards/confidence_uniqueness_reward": 0.9413081288337708,
"rewards/format_reward": 0.9911458373069764,
"rewards/frontier_coverage_0": 0.004954400286078453,
"rewards/frontier_coverage_1": 0.004954400286078453,
"rewards/frontier_coverage_10": 0.01719066435471177,
"rewards/frontier_coverage_15": 0.06398663446307182,
"rewards/frontier_coverage_20": 0.12680090814828873,
"rewards/frontier_coverage_25": 0.20377641618251802,
"rewards/frontier_coverage_5": 0.005010297335684299,
"rewards/frontier_entropy_batch_reward": -0.2962413549423218,
"signal/accuracy_reward/centered_abs_mean": 0.15307616889476777,
"signal/accuracy_reward/group_std_mean": 0.19774922728538513,
"signal/accuracy_reward/group_zero_std_frac": 0.44444444179534914,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0481387495994567,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07653808444738389,
"signal/advantage_abs_mean": 0.7635510325431824,
"signal/advantage_pre_scale_abs_mean": 0.09697668254375458,
"signal/advantage_pre_scale_std": 0.15258235931396485,
"signal/advantage_std": 0.9831050872802735,
"signal/brier_reward/centered_abs_mean": 0.12253386676311492,
"signal/brier_reward/group_std_mean": 0.15739177763462067,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16855136752128602,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012253387458622455,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026244480162858963,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03955877721309662,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03617042452096939,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026244479697197674,
"signal/format_reward/centered_abs_mean": 0.013921440858393908,
"signal/format_reward/group_std_mean": 0.02432932294905186,
"signal/format_reward/group_zero_std_frac": 0.9055555582046508,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09468984603881836,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006960720429196954,
"signal/frontier_coverage_0/centered_abs_mean": 0.16732266545295715,
"signal/frontier_coverage_0/group_std_mean": 0.21497032344341277,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03277806714177132,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002392714167945087,
"signal/frontier_coverage_1/centered_abs_mean": 0.16732266545295715,
"signal/frontier_coverage_1/group_std_mean": 0.21497032344341277,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03277806714177132,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002392714167945087,
"signal/frontier_coverage_10/centered_abs_mean": 0.09474649280309677,
"signal/frontier_coverage_10/group_std_mean": 0.12416742146015167,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01850355453789234,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013548748102039099,
"signal/frontier_coverage_15/centered_abs_mean": 0.06477891355752945,
"signal/frontier_coverage_15/group_std_mean": 0.08186470121145248,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012831047736108303,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009263384621590376,
"signal/frontier_coverage_20/centered_abs_mean": 0.09724516570568084,
"signal/frontier_coverage_20/group_std_mean": 0.123662668466568,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019288834184408188,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013906059321016073,
"signal/frontier_coverage_25/centered_abs_mean": 0.1411896228790283,
"signal/frontier_coverage_25/group_std_mean": 0.17907672822475434,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02798592709004879,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002019011718221009,
"signal/frontier_coverage_5/centered_abs_mean": 0.16672602891921998,
"signal/frontier_coverage_5/group_std_mean": 0.21423827409744262,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032660551741719245,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023841822519898416,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3291071951389313,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3969504415988922,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4558913826942444,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03291071951389313,
"step": 325
},
{
"calibration/aurc": 0.156909006067032,
"calibration/batch_distribution_entropy": 0.9543305090526145,
"calibration/buffer_distribution_entropy": 0.9825246543342956,
"calibration/confidence_entropy": 0.5014532464678767,
"calibration/coverage@0%": 0.03240064965769619,
"calibration/coverage@1%": 0.03240064965769619,
"calibration/coverage@10%": 0.3325626616081171,
"calibration/coverage@15%": 0.5786603371706475,
"calibration/coverage@20%": 0.8025542499093617,
"calibration/coverage@25%": 0.9022413212273618,
"calibration/coverage@30%": 0.9383812010443865,
"calibration/coverage@5%": 0.1835917703498551,
"calibration/ece": 0.1576187415510833,
"calibration/mean_confidence": 0.60028896248539,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008072916666666652,
"completions/max_length": 4059.2,
"completions/max_terminated_length": 4059.2,
"completions/mean_length": 1062.9158813476563,
"completions/mean_terminated_length": 1071.6300903320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 268.0,
"epoch": 0.7919901001237485,
"grad_norm": 0.0017952125053852797,
"learning_rate": 4.266826923076923e-06,
"loss": -0.0144,
"num_tokens": 800758663.0,
"reward": 1.0048826813697815,
"reward_std": 0.12268615663051605,
"rewards/accuracy_reward": 0.7028645753860474,
"rewards/brier_reward": 0.8274922251701355,
"rewards/confidence_uniqueness_reward": 0.9430109739303589,
"rewards/format_reward": 0.9919270753860474,
"rewards/frontier_coverage_0": 0.03261248916387558,
"rewards/frontier_coverage_1": 0.03261248916387558,
"rewards/frontier_coverage_10": 0.04394304975867271,
"rewards/frontier_coverage_15": 0.07767296582460403,
"rewards/frontier_coverage_20": 0.1443429633975029,
"rewards/frontier_coverage_25": 0.22653323411941528,
"rewards/frontier_coverage_5": 0.03271240890026093,
"rewards/frontier_entropy_batch_reward": -0.2800663381814957,
"signal/accuracy_reward/centered_abs_mean": 0.14945203959941863,
"signal/accuracy_reward/group_std_mean": 0.19367357790470124,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0694059491157533,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07472601979970932,
"signal/advantage_abs_mean": 0.7655353307723999,
"signal/advantage_pre_scale_abs_mean": 0.0933989018201828,
"signal/advantage_pre_scale_std": 0.14949961602687836,
"signal/advantage_std": 0.9830429792404175,
"signal/brier_reward/centered_abs_mean": 0.11935856491327286,
"signal/brier_reward/group_std_mean": 0.15586462020874023,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17148438096046448,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011935856752097606,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02573820427060127,
"signal/confidence_uniqueness_reward/group_std_mean": 0.041389158368110655,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.037190504372119904,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002573820622637868,
"signal/format_reward/centered_abs_mean": 0.01385091133415699,
"signal/format_reward/group_std_mean": 0.026973145455121993,
"signal/format_reward/group_zero_std_frac": 0.8861111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09901182055473327,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006925455667078495,
"signal/frontier_coverage_0/centered_abs_mean": 0.17266753017902375,
"signal/frontier_coverage_0/group_std_mean": 0.22434581220149993,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03531235456466675,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024691457394510506,
"signal/frontier_coverage_1/centered_abs_mean": 0.17266753017902375,
"signal/frontier_coverage_1/group_std_mean": 0.22434581220149993,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03531235456466675,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024691457394510506,
"signal/frontier_coverage_10/centered_abs_mean": 0.09966813772916794,
"signal/frontier_coverage_10/group_std_mean": 0.13157041370868683,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020390734449028967,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014252542983740567,
"signal/frontier_coverage_15/centered_abs_mean": 0.06750372946262359,
"signal/frontier_coverage_15/group_std_mean": 0.08493886291980743,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013990617357194424,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009653033805079759,
"signal/frontier_coverage_20/centered_abs_mean": 0.09670436531305313,
"signal/frontier_coverage_20/group_std_mean": 0.1224692091345787,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02009350135922432,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013828724855557083,
"signal/frontier_coverage_25/centered_abs_mean": 0.13782588988542557,
"signal/frontier_coverage_25/group_std_mean": 0.1750126987695694,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028620368614792824,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001970910234376788,
"signal/frontier_coverage_5/centered_abs_mean": 0.1721877634525299,
"signal/frontier_coverage_5/group_std_mean": 0.22375437915325164,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03521372601389885,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024622850120067596,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32352485656738283,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38960899114608766,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46970110535621645,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03235248811542988,
"step": 330
},
{
"calibration/aurc": 0.25974679160997044,
"calibration/batch_distribution_entropy": 0.9760155955492031,
"calibration/buffer_distribution_entropy": 0.9828829099511015,
"calibration/confidence_entropy": 0.48294149594195723,
"calibration/coverage@0%": 0.024704542613912535,
"calibration/coverage@1%": 0.024704542613912535,
"calibration/coverage@10%": 0.16822392267418446,
"calibration/coverage@15%": 0.23947779930752344,
"calibration/coverage@20%": 0.30226532800539135,
"calibration/coverage@25%": 0.4429456127437903,
"calibration/coverage@30%": 0.6829346276603824,
"calibration/coverage@5%": 0.07391920229977642,
"calibration/ece": 0.17664347223114546,
"calibration/mean_confidence": 0.5079064995383955,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008246527777777768,
"completions/max_length": 4031.8,
"completions/max_terminated_length": 4031.8,
"completions/mean_length": 1115.892529296875,
"completions/mean_terminated_length": 1125.290966796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 305.8,
"epoch": 0.8039899501256235,
"grad_norm": 0.0018948515644297004,
"learning_rate": 4.236778846153847e-06,
"loss": -0.0163,
"num_tokens": 816738929.0,
"reward": 0.9759376645088196,
"reward_std": 0.12409429550170899,
"rewards/accuracy_reward": 0.6448784708976746,
"rewards/brier_reward": 0.8030743598937988,
"rewards/confidence_uniqueness_reward": 0.9439346671104432,
"rewards/format_reward": 0.9916666507720947,
"rewards/frontier_coverage_0": 0.058890349417924884,
"rewards/frontier_coverage_1": 0.058890349417924884,
"rewards/frontier_coverage_10": 0.05147294811904431,
"rewards/frontier_coverage_15": 0.06580731272697449,
"rewards/frontier_coverage_20": 0.11382190585136413,
"rewards/frontier_coverage_25": 0.17733904123306274,
"rewards/frontier_coverage_5": 0.05892799347639084,
"rewards/frontier_entropy_batch_reward": -0.25403423607349396,
"signal/accuracy_reward/centered_abs_mean": 0.15788303017616273,
"signal/accuracy_reward/group_std_mean": 0.20750285685062408,
"signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0875071048736573,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07894151508808137,
"signal/advantage_abs_mean": 0.7426583290100097,
"signal/advantage_pre_scale_abs_mean": 0.09224161058664322,
"signal/advantage_pre_scale_std": 0.14779528975486755,
"signal/advantage_std": 0.9830945491790771,
"signal/brier_reward/centered_abs_mean": 0.13514640033245087,
"signal/brier_reward/group_std_mean": 0.17495015859603882,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1872038722038269,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013514639995992183,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024792130663990976,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04206196665763855,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03454387187957764,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002479213196784258,
"signal/format_reward/centered_abs_mean": 0.01342230886220932,
"signal/format_reward/group_std_mean": 0.028304946422576905,
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09320384189486504,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00671115443110466,
"signal/frontier_coverage_0/centered_abs_mean": 0.20540903508663177,
"signal/frontier_coverage_0/group_std_mean": 0.2632716208696365,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04065249636769295,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029373492114245893,
"signal/frontier_coverage_1/centered_abs_mean": 0.20540903508663177,
"signal/frontier_coverage_1/group_std_mean": 0.2632716208696365,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04065249636769295,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029373492114245893,
"signal/frontier_coverage_10/centered_abs_mean": 0.11642617136240005,
"signal/frontier_coverage_10/group_std_mean": 0.15153419971466064,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02303205505013466,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016648941906169056,
"signal/frontier_coverage_15/centered_abs_mean": 0.06793319284915925,
"signal/frontier_coverage_15/group_std_mean": 0.08559278100728988,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01348678469657898,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009714446961879731,
"signal/frontier_coverage_20/centered_abs_mean": 0.0874357521533966,
"signal/frontier_coverage_20/group_std_mean": 0.11122221052646637,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017344103008508683,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012503312435001134,
"signal/frontier_coverage_25/centered_abs_mean": 0.12143271416425705,
"signal/frontier_coverage_25/group_std_mean": 0.1560031145811081,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02407735027372837,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017364878440275788,
"signal/frontier_coverage_5/centered_abs_mean": 0.20495485663414,
"signal/frontier_coverage_5/group_std_mean": 0.26270574927330015,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04056171998381615,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002930854447185993,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3120520055294037,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38307093977928164,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.43333314061164857,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031205202266573907,
"step": 335
},
{
"calibration/aurc": 0.1777603587436345,
"calibration/batch_distribution_entropy": 0.9676360482311539,
"calibration/buffer_distribution_entropy": 0.9837100346462169,
"calibration/confidence_entropy": 0.48384860471380947,
"calibration/coverage@0%": 0.05912567353597281,
"calibration/coverage@1%": 0.07817329258359186,
"calibration/coverage@10%": 0.3694769900916339,
"calibration/coverage@15%": 0.551472741440538,
"calibration/coverage@20%": 0.6478955782906988,
"calibration/coverage@25%": 0.7625764361126298,
"calibration/coverage@30%": 0.8157900272755771,
"calibration/coverage@5%": 0.19237191228849984,
"calibration/ece": 0.12225110228153846,
"calibration/mean_confidence": 0.5703271550941463,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008159722222222231,
"completions/max_length": 4018.6,
"completions/max_terminated_length": 4018.6,
"completions/mean_length": 1021.0284057617188,
"completions/mean_terminated_length": 1029.4432495117187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 259.0,
"epoch": 0.8159898001274984,
"grad_norm": 0.00172898406162858,
"learning_rate": 4.20673076923077e-06,
"loss": -0.0258,
"num_tokens": 831595448.0,
"reward": 0.986263906955719,
"reward_std": 0.12218181192874908,
"rewards/accuracy_reward": 0.6758680582046509,
"rewards/brier_reward": 0.8213988423347474,
"rewards/confidence_uniqueness_reward": 0.9391404390335083,
"rewards/format_reward": 0.9915798544883728,
"rewards/frontier_coverage_0": 0.0459395432844758,
"rewards/frontier_coverage_1": 0.0459395432844758,
"rewards/frontier_coverage_10": 0.04152496140450239,
"rewards/frontier_coverage_15": 0.07311205416917801,
"rewards/frontier_coverage_20": 0.13377267271280288,
"rewards/frontier_coverage_25": 0.20741928815841676,
"rewards/frontier_coverage_5": 0.045927997678518295,
"rewards/frontier_entropy_batch_reward": -0.3200296819210052,
"signal/accuracy_reward/centered_abs_mean": 0.13541666567325591,
"signal/accuracy_reward/group_std_mean": 0.18041078448295594,
"signal/accuracy_reward/group_zero_std_frac": 0.4750000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.963217580318451,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06770833283662796,
"signal/advantage_abs_mean": 0.7463102102279663,
"signal/advantage_pre_scale_abs_mean": 0.09043871462345124,
"signal/advantage_pre_scale_std": 0.14913626313209533,
"signal/advantage_std": 0.9830648899078369,
"signal/brier_reward/centered_abs_mean": 0.12320598512887955,
"signal/brier_reward/group_std_mean": 0.16074151992797853,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17543665766716005,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01232059821486473,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02815811224281788,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04658014327287674,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04017215184867382,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028158111963421106,
"signal/format_reward/centered_abs_mean": 0.01487087681889534,
"signal/format_reward/group_std_mean": 0.03009483590722084,
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10635680183768273,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00743543840944767,
"signal/frontier_coverage_0/centered_abs_mean": 0.15372433960437776,
"signal/frontier_coverage_0/group_std_mean": 0.2000808149576187,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03130386024713516,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021982579957693813,
"signal/frontier_coverage_1/centered_abs_mean": 0.15372433960437776,
"signal/frontier_coverage_1/group_std_mean": 0.2000808149576187,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03130386024713516,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021982579957693813,
"signal/frontier_coverage_10/centered_abs_mean": 0.0841196671128273,
"signal/frontier_coverage_10/group_std_mean": 0.11117487698793412,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017140276730060577,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012029112316668034,
"signal/frontier_coverage_15/centered_abs_mean": 0.06903370916843414,
"signal/frontier_coverage_15/group_std_mean": 0.08654794842004776,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01405095923691988,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009871820569969715,
"signal/frontier_coverage_20/centered_abs_mean": 0.10134778022766114,
"signal/frontier_coverage_20/group_std_mean": 0.12806420773267746,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02063525579869747,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014492732472717762,
"signal/frontier_coverage_25/centered_abs_mean": 0.14322546422481536,
"signal/frontier_coverage_25/group_std_mean": 0.18165581822395324,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029165779426693916,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020481240935623646,
"signal/frontier_coverage_5/centered_abs_mean": 0.15351369380950927,
"signal/frontier_coverage_5/group_std_mean": 0.19981254935264586,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03126128278672695,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002195245958864689,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3323663532733917,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3995789408683777,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4719915151596069,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033236635476350786,
"step": 340
},
{
"calibration/aurc": 0.12287906036682998,
"calibration/batch_distribution_entropy": 0.9814748056693892,
"calibration/buffer_distribution_entropy": 0.9854357775100036,
"calibration/confidence_entropy": 0.5071326681710258,
"calibration/coverage@0%": 0.09825680183278489,
"calibration/coverage@1%": 0.10783126991789127,
"calibration/coverage@10%": 0.487797947842016,
"calibration/coverage@15%": 0.687217223910841,
"calibration/coverage@20%": 0.7937702159486942,
"calibration/coverage@25%": 0.8835173146188409,
"calibration/coverage@30%": 0.9581151832460734,
"calibration/coverage@5%": 0.2910469628144352,
"calibration/ece": 0.23547472343686363,
"calibration/mean_confidence": 0.49689369470204864,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011805555555555536,
"completions/max_length": 4073.0,
"completions/max_terminated_length": 4073.0,
"completions/mean_length": 1078.1122436523438,
"completions/mean_terminated_length": 1091.057568359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 258.0,
"epoch": 0.8279896501293734,
"grad_norm": 0.0016360305016860366,
"learning_rate": 4.176682692307693e-06,
"loss": -0.0312,
"num_tokens": 847106533.0,
"reward": 0.9822489023208618,
"reward_std": 0.12897055447101594,
"rewards/accuracy_reward": 0.6663194537162781,
"rewards/brier_reward": 0.7981275916099548,
"rewards/confidence_uniqueness_reward": 0.9408317327499389,
"rewards/format_reward": 0.9881944417953491,
"rewards/frontier_coverage_0": 0.029704060405492783,
"rewards/frontier_coverage_1": 0.029704060405492783,
"rewards/frontier_coverage_10": 0.03771770279854536,
"rewards/frontier_coverage_15": 0.06011849418282509,
"rewards/frontier_coverage_20": 0.1106926903128624,
"rewards/frontier_coverage_25": 0.17512863874435425,
"rewards/frontier_coverage_5": 0.029685120284557342,
"rewards/frontier_entropy_batch_reward": -0.25664323568344116,
"signal/accuracy_reward/centered_abs_mean": 0.1462131053209305,
"signal/accuracy_reward/group_std_mean": 0.19600538313388824,
"signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9670246005058288,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07310655266046524,
"signal/advantage_abs_mean": 0.7388134717941284,
"signal/advantage_pre_scale_abs_mean": 0.0943774089217186,
"signal/advantage_pre_scale_std": 0.15511732697486877,
"signal/advantage_std": 0.9831326723098754,
"signal/brier_reward/centered_abs_mean": 0.12683559954166412,
"signal/brier_reward/group_std_mean": 0.16544119119644166,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1707315742969513,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012683560699224472,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030468913540244103,
"signal/confidence_uniqueness_reward/group_std_mean": 0.049927102774381636,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04096878692507744,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003046891512349248,
"signal/format_reward/centered_abs_mean": 0.02019314244389534,
"signal/format_reward/group_std_mean": 0.03766540549695492,
"signal/format_reward/group_zero_std_frac": 0.8444444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1350069150328636,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01009657122194767,
"signal/frontier_coverage_0/centered_abs_mean": 0.1795959234237671,
"signal/frontier_coverage_0/group_std_mean": 0.23311225175857545,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034363172575831415,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002568221651017666,
"signal/frontier_coverage_1/centered_abs_mean": 0.1795959234237671,
"signal/frontier_coverage_1/group_std_mean": 0.23311225175857545,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034363172575831415,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002568221651017666,
"signal/frontier_coverage_10/centered_abs_mean": 0.10517283678054809,
"signal/frontier_coverage_10/group_std_mean": 0.1382586717605591,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020169777423143388,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015039715450257062,
"signal/frontier_coverage_15/centered_abs_mean": 0.062341035902500154,
"signal/frontier_coverage_15/group_std_mean": 0.07913636118173599,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012101586163043975,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008914768346585334,
"signal/frontier_coverage_20/centered_abs_mean": 0.08644651770591735,
"signal/frontier_coverage_20/group_std_mean": 0.11047539860010147,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016777468286454676,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001236185198649764,
"signal/frontier_coverage_25/centered_abs_mean": 0.12307052761316299,
"signal/frontier_coverage_25/group_std_mean": 0.1577708601951599,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023849079757928847,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017599085113033652,
"signal/frontier_coverage_5/centered_abs_mean": 0.1793459564447403,
"signal/frontier_coverage_5/group_std_mean": 0.23279379010200502,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0343147799372673,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025646470487117766,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3164542317390442,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38684444427490233,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4268608748912811,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03164542391896248,
"step": 345
},
{
"calibration/aurc": 0.20994078951499792,
"calibration/batch_distribution_entropy": 0.9538957528776107,
"calibration/buffer_distribution_entropy": 0.9870970000368399,
"calibration/confidence_entropy": 0.44064173248762045,
"calibration/coverage@0%": 0.01697276681996627,
"calibration/coverage@1%": 0.01697276681996627,
"calibration/coverage@10%": 0.2595149251508039,
"calibration/coverage@15%": 0.41668076479409083,
"calibration/coverage@20%": 0.5067220732085298,
"calibration/coverage@25%": 0.582640842086777,
"calibration/coverage@30%": 0.7986849580843456,
"calibration/coverage@5%": 0.076308216555416,
"calibration/ece": 0.15867194406599533,
"calibration/mean_confidence": 0.5516345775419665,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017100694444444443,
"completions/max_length": 3999.0,
"completions/max_terminated_length": 3999.0,
"completions/mean_length": 1023.1324829101562,
"completions/mean_terminated_length": 1040.8033447265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 226.6,
"epoch": 0.8399895001312484,
"grad_norm": 0.0017015208723023534,
"learning_rate": 4.146634615384616e-06,
"loss": -0.048,
"num_tokens": 861985019.0,
"reward": 0.9820253729820252,
"reward_std": 0.12685408145189286,
"rewards/accuracy_reward": 0.6758680701255798,
"rewards/brier_reward": 0.8076221346855164,
"rewards/confidence_uniqueness_reward": 0.9325710296630859,
"rewards/format_reward": 0.9828993082046509,
"rewards/frontier_coverage_0": 0.04194375555962324,
"rewards/frontier_coverage_1": 0.04194375555962324,
"rewards/frontier_coverage_10": 0.04001317657530308,
"rewards/frontier_coverage_15": 0.07637237012386322,
"rewards/frontier_coverage_20": 0.13732877969741822,
"rewards/frontier_coverage_25": 0.21134760677814485,
"rewards/frontier_coverage_5": 0.04197418745607138,
"rewards/frontier_entropy_batch_reward": -0.298277872800827,
"signal/accuracy_reward/centered_abs_mean": 0.13314887136220932,
"signal/accuracy_reward/group_std_mean": 0.17562229335308074,
"signal/accuracy_reward/group_zero_std_frac": 0.5000000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9567326664924621,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06657443568110466,
"signal/advantage_abs_mean": 0.7593642354011536,
"signal/advantage_pre_scale_abs_mean": 0.09298344552516938,
"signal/advantage_pre_scale_std": 0.1581791251897812,
"signal/advantage_std": 0.9830424070358277,
"signal/brier_reward/centered_abs_mean": 0.13524700105190277,
"signal/brier_reward/group_std_mean": 0.1742014318704605,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19523520171642303,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013524701073765754,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040957468748092654,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0662254698574543,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05914860144257546,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004095746856182814,
"signal/format_reward/centered_abs_mean": 0.02872721329331398,
"signal/format_reward/group_std_mean": 0.05186066627502441,
"signal/format_reward/group_zero_std_frac": 0.7972222208976746,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.20710389316082,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01436360664665699,
"signal/frontier_coverage_0/centered_abs_mean": 0.18308203518390656,
"signal/frontier_coverage_0/group_std_mean": 0.23565957844257354,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03774779662489891,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002618073020130396,
"signal/frontier_coverage_1/centered_abs_mean": 0.18308203518390656,
"signal/frontier_coverage_1/group_std_mean": 0.23565957844257354,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03774779662489891,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002618073020130396,
"signal/frontier_coverage_10/centered_abs_mean": 0.10456371903419495,
"signal/frontier_coverage_10/group_std_mean": 0.1368136912584305,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.021551913022994994,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014952612109482288,
"signal/frontier_coverage_15/centered_abs_mean": 0.0707410454750061,
"signal/frontier_coverage_15/group_std_mean": 0.08790467828512191,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01462008450180292,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010115969693288208,
"signal/frontier_coverage_20/centered_abs_mean": 0.09556236267089843,
"signal/frontier_coverage_20/group_std_mean": 0.11978346407413483,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019759462401270866,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013665418140590192,
"signal/frontier_coverage_25/centered_abs_mean": 0.13080873787403108,
"signal/frontier_coverage_25/group_std_mean": 0.16497585475444793,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0270388450473547,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018705649999901652,
"signal/frontier_coverage_5/centered_abs_mean": 0.1828676700592041,
"signal/frontier_coverage_5/group_std_mean": 0.23539845943450927,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037703678011894226,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026150076184421776,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3392969012260437,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40456579327583314,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49072799682617185,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033929687738418576,
"step": 350
},
{
"epoch": 0.8399895001312484,
"eval_calibration/aurc": 0.12627949306525968,
"eval_calibration/batch_distribution_entropy": 0.9467912754909852,
"eval_calibration/buffer_distribution_entropy": 0.9876254154039424,
"eval_calibration/confidence_entropy": 0.48752701565040085,
"eval_calibration/coverage@0%": 0.322244623655914,
"eval_calibration/coverage@1%": 0.322244623655914,
"eval_calibration/coverage@10%": 0.5063844086021505,
"eval_calibration/coverage@15%": 0.7624327956989246,
"eval_calibration/coverage@20%": 0.8098118279569894,
"eval_calibration/coverage@25%": 0.8882728494623656,
"eval_calibration/coverage@30%": 0.9519489247311829,
"eval_calibration/coverage@5%": 0.322244623655914,
"eval_calibration/ece": 0.2574667368716044,
"eval_calibration/mean_confidence": 0.5234268010988257,
"eval_completions/clipped_ratio": 0.01631944444444446,
"eval_completions/max_length": 3784.5,
"eval_completions/max_terminated_length": 3784.5,
"eval_completions/mean_length": 1010.5425516764323,
"eval_completions/mean_terminated_length": 1027.3308817545574,
"eval_completions/min_length": 0.0,
"eval_completions/min_terminated_length": 273.8333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 861985019.0,
"eval_reward": 0.9114697674910227,
"eval_reward_std": 0.24290815244118372,
"eval_rewards/accuracy_reward": 0.6866319378217062,
"eval_rewards/brier_reward": 0.7930839558442434,
"eval_rewards/confidence_uniqueness_reward": 0.8857908844947815,
"eval_rewards/format_reward": 0.984375,
"eval_rewards/frontier_coverage_0": 0.01952519454061985,
"eval_rewards/frontier_coverage_1": 0.01952519454061985,
"eval_rewards/frontier_coverage_10": 0.0334686745579044,
"eval_rewards/frontier_coverage_15": 0.06455122741560142,
"eval_rewards/frontier_coverage_20": 0.11642149960001309,
"eval_rewards/frontier_coverage_25": 0.18265460431575775,
"eval_rewards/frontier_coverage_5": 0.019537934257338446,
"eval_rewards/frontier_entropy_batch_reward": -0.984375,
"eval_runtime": 213.2572,
"eval_samples_per_second": 4.689,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4206271717945735,
"eval_signal/accuracy_reward/group_std_mean": 0.4648505250612895,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8802718718846639,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21031358589728674,
"eval_signal/advantage_abs_mean": 0.8569314777851105,
"eval_signal/advantage_pre_scale_abs_mean": 0.20814315478006998,
"eval_signal/advantage_pre_scale_std": 0.24182888368765512,
"eval_signal/advantage_std": 0.9864102900028229,
"eval_signal/brier_reward/centered_abs_mean": 0.19394498566786447,
"eval_signal/brier_reward/group_std_mean": 0.24899733563264212,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08124354109168053,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01939449831843376,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.057598222667972244,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09713333596785863,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023997636511921883,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005759822437539697,
"eval_signal/format_reward/centered_abs_mean": 0.029622395678112905,
"eval_signal/format_reward/group_std_mean": 0.07291496824473143,
"eval_signal/format_reward/group_zero_std_frac": 0.6388889104127884,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.06088021490722895,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.014811197839056453,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.31392258902390796,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4262712150812149,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.018851852975785732,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004489093010003368,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.31392258902390796,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4262712150812149,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.018851852975785732,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004489093010003368,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.17115350315968195,
"eval_signal/frontier_coverage_10/group_std_mean": 0.24345367650190988,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.010294714787354073,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024474948877468705,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.08757202078898747,
"eval_signal/frontier_coverage_15/group_std_mean": 0.10957717150449753,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005260932492092252,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012522799079306424,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1454414650797844,
"eval_signal/frontier_coverage_20/group_std_mean": 0.18414875119924545,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.008730871990943948,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020798128486300507,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.22669106721878052,
"eval_signal/frontier_coverage_25/group_std_mean": 0.28089650968710583,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013599606230854988,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003241682231115798,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.31356939673423767,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4258475701014201,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.018830711642901104,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004484042447681229,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.029622395678112905,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07291496824473143,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6388889104127884,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012176043431585034,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0029622397075096765,
"eval_steps_per_second": 0.028,
"step": 350
},
{
"epoch": 0.8399895001312484,
"step": 350,
"train_probe_calibration/aurc": 0.11138316671294553,
"train_probe_calibration/batch_distribution_entropy": 0.9328936521878086,
"train_probe_calibration/buffer_distribution_entropy": 0.987729842542267,
"train_probe_calibration/confidence_entropy": 0.5000671971525904,
"train_probe_calibration/coverage@0%": 0.22043010752688172,
"train_probe_calibration/coverage@1%": 0.22043010752688172,
"train_probe_calibration/coverage@10%": 0.6088709677419355,
"train_probe_calibration/coverage@15%": 0.8383736559139785,
"train_probe_calibration/coverage@20%": 0.9114583333333334,
"train_probe_calibration/coverage@25%": 0.9583333333333334,
"train_probe_calibration/coverage@30%": 0.9895833333333334,
"train_probe_calibration/coverage@5%": 0.36290322580645157,
"train_probe_calibration/ece": 0.28476146068548386,
"train_probe_calibration/mean_confidence": 0.5098099438844086,
"train_probe_completions/clipped_ratio": 0.014756944444444439,
"train_probe_completions/max_length": 3701.6666666666665,
"train_probe_completions/max_terminated_length": 3701.6666666666665,
"train_probe_completions/mean_length": 989.0814412434896,
"train_probe_completions/mean_terminated_length": 1003.9476216634115,
"train_probe_completions/min_length": 93.66666666666667,
"train_probe_completions/min_terminated_length": 246.66666666666666,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 861985019.0,
"train_probe_reward": 0.9344007472197214,
"train_probe_reward_std": 0.23137953132390976,
"train_probe_rewards/accuracy_reward": 0.7387152711550394,
"train_probe_rewards/brier_reward": 0.7828520933787028,
"train_probe_rewards/confidence_uniqueness_reward": 0.8851476311683655,
"train_probe_rewards/format_reward": 0.9843750099341074,
"train_probe_rewards/frontier_coverage_0": -0.02593635581433773,
"train_probe_rewards/frontier_coverage_1": -0.02593635581433773,
"train_probe_rewards/frontier_coverage_10": 0.005999071678767602,
"train_probe_rewards/frontier_coverage_15": 0.06303013488650322,
"train_probe_rewards/frontier_coverage_20": 0.12391630684336026,
"train_probe_rewards/frontier_coverage_25": 0.19901238630215326,
"train_probe_rewards/frontier_coverage_5": -0.02588070183992386,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9843750099341074,
"train_probe_runtime": 207.6057,
"train_probe_samples_per_second": 4.817,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3740776975949605,
"train_probe_signal/accuracy_reward/group_std_mean": 0.43718187014261883,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.830837219953537,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18703884879748026,
"train_probe_signal/advantage_abs_mean": 0.8095610837141672,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.18753594905138016,
"train_probe_signal/advantage_pre_scale_std": 0.23162239789962769,
"train_probe_signal/advantage_std": 0.9863859911759695,
"train_probe_signal/brier_reward/centered_abs_mean": 0.20244234800338745,
"train_probe_signal/brier_reward/group_std_mean": 0.26038682212432224,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08980598424871762,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.020244235793749493,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05636486907800039,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.09551115706562996,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024982516343394916,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005636486845711867,
"train_probe_signal/format_reward/centered_abs_mean": 0.029296875620881718,
"train_probe_signal/format_reward/group_std_mean": 0.06911189792056878,
"train_probe_signal/format_reward/group_zero_std_frac": 0.6666666865348816,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.06429031708588202,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.014648437810440859,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.3012530356645584,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.42430545886357623,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01914732779065768,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004307918444586297,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3012530356645584,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.42430545886357623,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01914732779065768,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004307918444586297,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.16096202532450357,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.24077540387709936,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.010228140590091547,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023017569134632745,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.08790385474761327,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.11155568187435468,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005583847174420953,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001257025171071291,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.14556232343117395,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.18222308903932571,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009244945365935564,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020815412087055543,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2230932116508484,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.27396292984485626,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0141687939564387,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003190232984100779,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30094441771507263,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.42391479512055713,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019127743629117806,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004303505294956267,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.029296875620881718,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.06911189792056878,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6666666865348816,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012858063836271564,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00292968771342809,
"train_probe_steps_per_second": 0.029
},
{
"calibration/aurc": 0.20218502123502455,
"calibration/batch_distribution_entropy": 0.9779656063892196,
"calibration/buffer_distribution_entropy": 0.9880120078431807,
"calibration/confidence_entropy": 0.5058543228286638,
"calibration/coverage@0%": 0.018292267448295645,
"calibration/coverage@1%": 0.018292267448295645,
"calibration/coverage@10%": 0.38723870443511277,
"calibration/coverage@15%": 0.47008130188731256,
"calibration/coverage@20%": 0.5248855388816154,
"calibration/coverage@25%": 0.602678489200584,
"calibration/coverage@30%": 0.7387349490230253,
"calibration/coverage@5%": 0.13471608262392168,
"calibration/ece": 0.15619698133977333,
"calibration/mean_confidence": 0.543017593254968,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125000000002,
"completions/max_length": 4024.0,
"completions/max_terminated_length": 4024.0,
"completions/mean_length": 1027.3447265625,
"completions/mean_terminated_length": 1048.2111450195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 224.4,
"epoch": 0.8519893501331234,
"grad_norm": 0.0016697923419997096,
"learning_rate": 4.116586538461539e-06,
"loss": -0.0577,
"num_tokens": 876918622.0,
"reward": 0.9857487201690673,
"reward_std": 0.1384602814912796,
"rewards/accuracy_reward": 0.6854166626930237,
"rewards/brier_reward": 0.7945013403892517,
"rewards/confidence_uniqueness_reward": 0.9326841950416564,
"rewards/format_reward": 0.9798611164093017,
"rewards/frontier_coverage_0": 0.014988563163205982,
"rewards/frontier_coverage_1": 0.014988563163205982,
"rewards/frontier_coverage_10": 0.029185665771365166,
"rewards/frontier_coverage_15": 0.06560450792312622,
"rewards/frontier_coverage_20": 0.12084827721118926,
"rewards/frontier_coverage_25": 0.18861164450645446,
"rewards/frontier_coverage_5": 0.015013675601221622,
"rewards/frontier_entropy_batch_reward": -0.2603289097547531,
"signal/accuracy_reward/centered_abs_mean": 0.1457356780767441,
"signal/accuracy_reward/group_std_mean": 0.19056201577186585,
"signal/accuracy_reward/group_zero_std_frac": 0.46111111640930175,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9496296286582947,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07286783903837205,
"signal/advantage_abs_mean": 0.750654423236847,
"signal/advantage_pre_scale_abs_mean": 0.10265205949544906,
"signal/advantage_pre_scale_std": 0.1708187222480774,
"signal/advantage_std": 0.9831824183464051,
"signal/brier_reward/centered_abs_mean": 0.14322828352451325,
"signal/brier_reward/group_std_mean": 0.18320123255252838,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1866349160671234,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014322828128933906,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04218879491090775,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06747839152812958,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05476883351802826,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004218879528343678,
"signal/format_reward/centered_abs_mean": 0.03328993059694767,
"signal/format_reward/group_std_mean": 0.057012468576431274,
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21570837497711182,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.016644965298473834,
"signal/frontier_coverage_0/centered_abs_mean": 0.18814339339733124,
"signal/frontier_coverage_0/group_std_mean": 0.24020065665245055,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03511350601911545,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026904504746198656,
"signal/frontier_coverage_1/centered_abs_mean": 0.18814339339733124,
"signal/frontier_coverage_1/group_std_mean": 0.24020065665245055,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03511350601911545,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026904504746198656,
"signal/frontier_coverage_10/centered_abs_mean": 0.11119063049554825,
"signal/frontier_coverage_10/group_std_mean": 0.14428375512361527,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020754556357860564,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015900259371846915,
"signal/frontier_coverage_15/centered_abs_mean": 0.06850891709327697,
"signal/frontier_coverage_15/group_std_mean": 0.08640409409999847,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012759264931082726,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009796775411814451,
"signal/frontier_coverage_20/centered_abs_mean": 0.09464813470840454,
"signal/frontier_coverage_20/group_std_mean": 0.12077843248844147,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01762023866176605,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013534683734178544,
"signal/frontier_coverage_25/centered_abs_mean": 0.1316935181617737,
"signal/frontier_coverage_25/group_std_mean": 0.16893566846847535,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024517284706234932,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018832173896953463,
"signal/frontier_coverage_5/centered_abs_mean": 0.18792597949504852,
"signal/frontier_coverage_5/group_std_mean": 0.23992567658424377,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03507302924990654,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002687341393902898,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3207733452320099,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39081095457077025,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41783658862113954,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03207733556628227,
"step": 355
},
{
"calibration/aurc": 0.11303180426621968,
"calibration/batch_distribution_entropy": 0.9234944837461085,
"calibration/buffer_distribution_entropy": 0.9878215954898579,
"calibration/confidence_entropy": 0.49597159820928355,
"calibration/coverage@0%": 0.03945796090860965,
"calibration/coverage@1%": 0.138416294241943,
"calibration/coverage@10%": 0.6530974518273713,
"calibration/coverage@15%": 0.7242895348491992,
"calibration/coverage@20%": 0.772423903710264,
"calibration/coverage@25%": 0.8850370047066054,
"calibration/coverage@30%": 0.9324324324324325,
"calibration/coverage@5%": 0.44373066635011693,
"calibration/ece": 0.12756140317825732,
"calibration/mean_confidence": 0.643185101661414,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.019704861111111093,
"completions/max_length": 3997.8,
"completions/max_terminated_length": 3997.8,
"completions/mean_length": 999.0853271484375,
"completions/mean_terminated_length": 1019.1470458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.4,
"epoch": 0.8639892001349984,
"grad_norm": 0.0016646608710289001,
"learning_rate": 4.086538461538462e-06,
"loss": -0.054,
"num_tokens": 891516405.0,
"reward": 0.9785197615623474,
"reward_std": 0.14136494994163512,
"rewards/accuracy_reward": 0.6750868082046508,
"rewards/brier_reward": 0.8187860488891602,
"rewards/confidence_uniqueness_reward": 0.9283985733985901,
"rewards/format_reward": 0.9799479246139526,
"rewards/frontier_coverage_0": 0.04092637412250042,
"rewards/frontier_coverage_1": 0.04092637412250042,
"rewards/frontier_coverage_10": 0.042030976712703706,
"rewards/frontier_coverage_15": 0.0748762235045433,
"rewards/frontier_coverage_20": 0.13637623935937881,
"rewards/frontier_coverage_25": 0.2098323732614517,
"rewards/frontier_coverage_5": 0.040917468070983884,
"rewards/frontier_entropy_batch_reward": -0.3209426164627075,
"signal/accuracy_reward/centered_abs_mean": 0.1537706136703491,
"signal/accuracy_reward/group_std_mean": 0.1990972250699997,
"signal/accuracy_reward/group_zero_std_frac": 0.44166666865348814,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0244532942771911,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07688530683517455,
"signal/advantage_abs_mean": 0.7572015047073364,
"signal/advantage_pre_scale_abs_mean": 0.10625196099281312,
"signal/advantage_pre_scale_std": 0.17508123219013214,
"signal/advantage_std": 0.9831428170204163,
"signal/brier_reward/centered_abs_mean": 0.13040018677711487,
"signal/brier_reward/group_std_mean": 0.16888347268104553,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17432362139225005,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013040019199252129,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04242881685495377,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06739743649959565,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05661019757390022,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00424288185313344,
"signal/format_reward/centered_abs_mean": 0.03179796040058136,
"signal/format_reward/group_std_mean": 0.054969260841608046,
"signal/format_reward/group_zero_std_frac": 0.7861111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21095921397209166,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01589898020029068,
"signal/frontier_coverage_0/centered_abs_mean": 0.16192724108695983,
"signal/frontier_coverage_0/group_std_mean": 0.2089155375957489,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030903545394539832,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002315559471026063,
"signal/frontier_coverage_1/centered_abs_mean": 0.16192724108695983,
"signal/frontier_coverage_1/group_std_mean": 0.2089155375957489,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030903545394539832,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002315559471026063,
"signal/frontier_coverage_10/centered_abs_mean": 0.09113808274269104,
"signal/frontier_coverage_10/group_std_mean": 0.11921153515577317,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017393879033625124,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013032745802775026,
"signal/frontier_coverage_15/centered_abs_mean": 0.06823997497558594,
"signal/frontier_coverage_15/group_std_mean": 0.08615372478961944,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013093681819736958,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009758316911756993,
"signal/frontier_coverage_20/centered_abs_mean": 0.10009044259786606,
"signal/frontier_coverage_20/group_std_mean": 0.1269907459616661,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01922752782702446,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014312933897599577,
"signal/frontier_coverage_25/centered_abs_mean": 0.14133644700050355,
"signal/frontier_coverage_25/group_std_mean": 0.17950156033039094,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02715020589530468,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020211110822856426,
"signal/frontier_coverage_5/centered_abs_mean": 0.16168299615383147,
"signal/frontier_coverage_5/group_std_mean": 0.2086247146129608,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030856142193078993,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002312066778540611,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32869015336036683,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.393494176864624,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4426185369491577,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03286901637911797,
"step": 360
},
{
"calibration/aurc": 0.10046656835849702,
"calibration/batch_distribution_entropy": 0.9755306822700215,
"calibration/buffer_distribution_entropy": 0.9876695227054582,
"calibration/confidence_entropy": 0.4657519076394254,
"calibration/coverage@0%": 0.2736829420052594,
"calibration/coverage@1%": 0.3068851405653573,
"calibration/coverage@10%": 0.6568735419839828,
"calibration/coverage@15%": 0.7042328981637536,
"calibration/coverage@20%": 0.753031806065644,
"calibration/coverage@25%": 0.851407047876506,
"calibration/coverage@30%": 0.8967798974982386,
"calibration/coverage@5%": 0.5671302818644433,
"calibration/ece": 0.22066731060189587,
"calibration/mean_confidence": 0.5431307836996118,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02265624999999998,
"completions/max_length": 4056.0,
"completions/max_terminated_length": 4056.0,
"completions/mean_length": 1015.9993286132812,
"completions/mean_terminated_length": 1039.5235473632813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.4,
"epoch": 0.8759890501368733,
"grad_norm": 0.001823436003178358,
"learning_rate": 4.0564903846153846e-06,
"loss": -0.0594,
"num_tokens": 906318413.0,
"reward": 0.9893832921981811,
"reward_std": 0.13554426431655883,
"rewards/accuracy_reward": 0.7008680462837219,
"rewards/brier_reward": 0.7875616431236268,
"rewards/confidence_uniqueness_reward": 0.9278796195983887,
"rewards/format_reward": 0.9766493082046509,
"rewards/frontier_coverage_0": 0.007047331100329757,
"rewards/frontier_coverage_1": 0.007047331100329757,
"rewards/frontier_coverage_10": 0.025778009090572596,
"rewards/frontier_coverage_15": 0.0655012458562851,
"rewards/frontier_coverage_20": 0.12075587064027786,
"rewards/frontier_coverage_25": 0.18987874686717987,
"rewards/frontier_coverage_5": 0.007139163976535201,
"rewards/frontier_entropy_batch_reward": -0.2697057068347931,
"signal/accuracy_reward/centered_abs_mean": 0.13690320998430253,
"signal/accuracy_reward/group_std_mean": 0.18647956252098083,
"signal/accuracy_reward/group_zero_std_frac": 0.45000000596046447,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9336233973503113,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06845160499215126,
"signal/advantage_abs_mean": 0.7356076240539551,
"signal/advantage_pre_scale_abs_mean": 0.0975809395313263,
"signal/advantage_pre_scale_std": 0.1690650999546051,
"signal/advantage_std": 0.9831165671348572,
"signal/brier_reward/centered_abs_mean": 0.1374752402305603,
"signal/brier_reward/group_std_mean": 0.17852137982845306,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1881021738052368,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013747524283826352,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04467645138502121,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0713021658360958,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.061282969266176227,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004467645334079862,
"signal/format_reward/centered_abs_mean": 0.03498806394636631,
"signal/format_reward/group_std_mean": 0.05998894199728966,
"signal/format_reward/group_zero_std_frac": 0.7750000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.24012745022773743,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.017494031973183156,
"signal/frontier_coverage_0/centered_abs_mean": 0.18660827577114106,
"signal/frontier_coverage_0/group_std_mean": 0.24346633851528168,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03638794124126434,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026684983633458613,
"signal/frontier_coverage_1/centered_abs_mean": 0.18660827577114106,
"signal/frontier_coverage_1/group_std_mean": 0.24346633851528168,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03638794124126434,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026684983633458613,
"signal/frontier_coverage_10/centered_abs_mean": 0.1076819583773613,
"signal/frontier_coverage_10/group_std_mean": 0.14231389611959458,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020962074026465417,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001539852051064372,
"signal/frontier_coverage_15/centered_abs_mean": 0.06423577815294265,
"signal/frontier_coverage_15/group_std_mean": 0.08117228299379349,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012597080320119858,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00091857158113271,
"signal/frontier_coverage_20/centered_abs_mean": 0.08381548821926117,
"signal/frontier_coverage_20/group_std_mean": 0.1063159465789795,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016472844406962395,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011985614662989974,
"signal/frontier_coverage_25/centered_abs_mean": 0.11446720212697983,
"signal/frontier_coverage_25/group_std_mean": 0.14590185582637788,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022491169348359107,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016368810553103685,
"signal/frontier_coverage_5/centered_abs_mean": 0.18639478087425232,
"signal/frontier_coverage_5/group_std_mean": 0.2431890696287155,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036346501857042315,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002665445441380143,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31751392483711244,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3888779103755951,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4340252041816711,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03175139352679253,
"step": 365
},
{
"calibration/aurc": 0.11075577590842926,
"calibration/batch_distribution_entropy": 0.8718789410455423,
"calibration/buffer_distribution_entropy": 0.9877933843635548,
"calibration/confidence_entropy": 0.4901921855875632,
"calibration/coverage@0%": 0.1376387343102201,
"calibration/coverage@1%": 0.19133080350082063,
"calibration/coverage@10%": 0.6036193234453195,
"calibration/coverage@15%": 0.8161315365363011,
"calibration/coverage@20%": 0.8846361185983828,
"calibration/coverage@25%": 0.9013477088948786,
"calibration/coverage@30%": 0.910512129380054,
"calibration/coverage@5%": 0.38877242550776914,
"calibration/ece": 0.14343248950202986,
"calibration/mean_confidence": 0.6587681828657079,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012586805555555558,
"completions/max_length": 4009.4,
"completions/max_terminated_length": 4009.4,
"completions/mean_length": 944.9483520507813,
"completions/mean_terminated_length": 957.1441284179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 227.6,
"epoch": 0.8879889001387483,
"grad_norm": 0.001997613813728094,
"learning_rate": 4.026442307692308e-06,
"loss": -0.0314,
"num_tokens": 920275482.0,
"reward": 1.0175946712493897,
"reward_std": 0.11938634067773819,
"rewards/accuracy_reward": 0.7550347208976745,
"rewards/brier_reward": 0.8396147847175598,
"rewards/confidence_uniqueness_reward": 0.929998254776001,
"rewards/format_reward": 0.9874131798744201,
"rewards/frontier_coverage_0": 0.008122816309332847,
"rewards/frontier_coverage_1": 0.008122816309332847,
"rewards/frontier_coverage_10": 0.028084695525467395,
"rewards/frontier_coverage_15": 0.09159668684005737,
"rewards/frontier_coverage_20": 0.17193578481674193,
"rewards/frontier_coverage_25": 0.2666388005018234,
"rewards/frontier_coverage_5": 0.008194121345877648,
"rewards/frontier_entropy_batch_reward": -0.3892317533493042,
"signal/accuracy_reward/centered_abs_mean": 0.12407768964767456,
"signal/accuracy_reward/group_std_mean": 0.16893844306468964,
"signal/accuracy_reward/group_zero_std_frac": 0.5000000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9724384427070618,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06203884482383728,
"signal/advantage_abs_mean": 0.7490497827529907,
"signal/advantage_pre_scale_abs_mean": 0.08648647367954254,
"signal/advantage_pre_scale_std": 0.1495155483484268,
"signal/advantage_std": 0.9829149723052979,
"signal/brier_reward/centered_abs_mean": 0.11033552289009094,
"signal/brier_reward/group_std_mean": 0.1457503229379654,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17304804623126985,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011033552512526513,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034914476424455644,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05499633625149727,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05407209992408753,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034914476331323386,
"signal/format_reward/centered_abs_mean": 0.02003580741584301,
"signal/format_reward/group_std_mean": 0.037604504451155665,
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1530803084373474,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010017903707921504,
"signal/frontier_coverage_0/centered_abs_mean": 0.13898501694202423,
"signal/frontier_coverage_0/group_std_mean": 0.17986855208873748,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031086085736751555,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019874857971444726,
"signal/frontier_coverage_1/centered_abs_mean": 0.13898501694202423,
"signal/frontier_coverage_1/group_std_mean": 0.17986855208873748,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031086085736751555,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019874857971444726,
"signal/frontier_coverage_10/centered_abs_mean": 0.07780203223228455,
"signal/frontier_coverage_10/group_std_mean": 0.10243205726146698,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017435486987233163,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001112569053657353,
"signal/frontier_coverage_15/centered_abs_mean": 0.06910406351089478,
"signal/frontier_coverage_15/group_std_mean": 0.08591423332691192,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015592486225068569,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009881880832836032,
"signal/frontier_coverage_20/centered_abs_mean": 0.10108875632286071,
"signal/frontier_coverage_20/group_std_mean": 0.12666354775428773,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022827718779444693,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014455692144110799,
"signal/frontier_coverage_25/centered_abs_mean": 0.1385332614183426,
"signal/frontier_coverage_25/group_std_mean": 0.17508584558963775,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03128995075821876,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019810255384072662,
"signal/frontier_coverage_5/centered_abs_mean": 0.13876722007989883,
"signal/frontier_coverage_5/group_std_mean": 0.1796049416065216,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031037060543894768,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001984371221624315,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34756895899772644,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.408492773771286,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.54748575091362,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03475689888000488,
"step": 370
},
{
"calibration/aurc": 0.16836747731521123,
"calibration/batch_distribution_entropy": 0.9409667328624938,
"calibration/buffer_distribution_entropy": 0.9858811949340381,
"calibration/confidence_entropy": 0.5013783566039679,
"calibration/coverage@0%": 0.01783872765233107,
"calibration/coverage@1%": 0.01783872765233107,
"calibration/coverage@10%": 0.24995211403058035,
"calibration/coverage@15%": 0.5976058346179306,
"calibration/coverage@20%": 0.7031263919516597,
"calibration/coverage@25%": 0.8019207635514969,
"calibration/coverage@30%": 0.8845246697506749,
"calibration/coverage@5%": 0.09129267502075213,
"calibration/ece": 0.16091990937012796,
"calibration/mean_confidence": 0.6130512914764606,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007378472222222187,
"completions/max_length": 3958.2,
"completions/max_terminated_length": 3958.2,
"completions/mean_length": 985.8460327148438,
"completions/mean_terminated_length": 992.9516845703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 232.8,
"epoch": 0.8999887501406233,
"grad_norm": 0.0019466127268970013,
"learning_rate": 3.996394230769231e-06,
"loss": -0.0099,
"num_tokens": 934735820.0,
"reward": 0.9989883065223694,
"reward_std": 0.11320204883813859,
"rewards/accuracy_reward": 0.6914930582046509,
"rewards/brier_reward": 0.8222344160079956,
"rewards/confidence_uniqueness_reward": 0.9439424037933349,
"rewards/format_reward": 0.9927951335906983,
"rewards/frontier_coverage_0": 0.03563723305705935,
"rewards/frontier_coverage_1": 0.035642618965357545,
"rewards/frontier_coverage_10": 0.044866102561354634,
"rewards/frontier_coverage_15": 0.07400252968072892,
"rewards/frontier_coverage_20": 0.13133004158735276,
"rewards/frontier_coverage_25": 0.20458360016345978,
"rewards/frontier_coverage_5": 0.03570191371254623,
"rewards/frontier_entropy_batch_reward": -0.2780673325061798,
"signal/accuracy_reward/centered_abs_mean": 0.13141276091337203,
"signal/accuracy_reward/group_std_mean": 0.1752326160669327,
"signal/accuracy_reward/group_zero_std_frac": 0.48888888359069826,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9961167454719544,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06570638045668602,
"signal/advantage_abs_mean": 0.7583129167556762,
"signal/advantage_pre_scale_abs_mean": 0.08607572019100189,
"signal/advantage_pre_scale_std": 0.13993633836507796,
"signal/advantage_std": 0.9829720020294189,
"signal/brier_reward/centered_abs_mean": 0.11517563909292221,
"signal/brier_reward/group_std_mean": 0.14917479753494262,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17474163174629212,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01151756402105093,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023243167623877525,
"signal/confidence_uniqueness_reward/group_std_mean": 0.036514821276068685,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03521691001951695,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023243167670443654,
"signal/format_reward/centered_abs_mean": 0.011572265811264516,
"signal/format_reward/group_std_mean": 0.022318005003035067,
"signal/format_reward/group_zero_std_frac": 0.9027777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08761402815580369,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005786132905632258,
"signal/frontier_coverage_0/centered_abs_mean": 0.1577325791120529,
"signal/frontier_coverage_0/group_std_mean": 0.20713994801044464,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03425569087266922,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022555758245289327,
"signal/frontier_coverage_1/centered_abs_mean": 0.15770569443702698,
"signal/frontier_coverage_1/group_std_mean": 0.20710653066635132,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034249893575906756,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002255191374570131,
"signal/frontier_coverage_10/centered_abs_mean": 0.08623393028974533,
"signal/frontier_coverage_10/group_std_mean": 0.11464256942272186,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01874492093920708,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012331451987847685,
"signal/frontier_coverage_15/centered_abs_mean": 0.0666267767548561,
"signal/frontier_coverage_15/group_std_mean": 0.08374895453453064,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014445245079696179,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009527628892101348,
"signal/frontier_coverage_20/centered_abs_mean": 0.09474294930696488,
"signal/frontier_coverage_20/group_std_mean": 0.11888675689697266,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020516883209347726,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001354824099689722,
"signal/frontier_coverage_25/centered_abs_mean": 0.13197840452194215,
"signal/frontier_coverage_25/group_std_mean": 0.16576823592185974,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028572235628962515,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001887291227467358,
"signal/frontier_coverage_5/centered_abs_mean": 0.1573409467935562,
"signal/frontier_coverage_5/group_std_mean": 0.20665175020694732,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034170858934521674,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002249975502490997,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32009653449058534,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3921052277088165,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48509649038314817,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032009654119610785,
"step": 375
},
{
"calibration/aurc": 0.16092519711536962,
"calibration/batch_distribution_entropy": 0.9423709566617757,
"calibration/buffer_distribution_entropy": 0.9849338657064634,
"calibration/confidence_entropy": 0.48836660732999615,
"calibration/coverage@0%": 0.07674090848546708,
"calibration/coverage@1%": 0.09743056365788086,
"calibration/coverage@10%": 0.3124138291737335,
"calibration/coverage@15%": 0.5342731730034984,
"calibration/coverage@20%": 0.6844617418251927,
"calibration/coverage@25%": 0.7673174485999794,
"calibration/coverage@30%": 0.9796584880636605,
"calibration/coverage@5%": 0.2446315302305752,
"calibration/ece": 0.16269622681227508,
"calibration/mean_confidence": 0.6199632640210615,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007204861111111116,
"completions/max_length": 3954.0,
"completions/max_terminated_length": 3954.0,
"completions/mean_length": 975.0330810546875,
"completions/mean_terminated_length": 982.1337524414063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 201.8,
"epoch": 0.9119886001424983,
"grad_norm": 0.0019179882947355509,
"learning_rate": 3.966346153846154e-06,
"loss": -0.0094,
"num_tokens": 949099241.0,
"reward": 1.011078429222107,
"reward_std": 0.11091785579919815,
"rewards/accuracy_reward": 0.7229166746139526,
"rewards/brier_reward": 0.8134139537811279,
"rewards/confidence_uniqueness_reward": 0.9428596019744873,
"rewards/format_reward": 0.9926215171813965,
"rewards/frontier_coverage_0": 0.004921641945838928,
"rewards/frontier_coverage_1": 0.004925927333533764,
"rewards/frontier_coverage_10": 0.01868428089655936,
"rewards/frontier_coverage_15": 0.07345463410019874,
"rewards/frontier_coverage_20": 0.13654196113348008,
"rewards/frontier_coverage_25": 0.2155262529850006,
"rewards/frontier_coverage_5": 0.0050234109163284305,
"rewards/frontier_entropy_batch_reward": -0.28882819712162017,
"signal/accuracy_reward/centered_abs_mean": 0.12810329645872115,
"signal/accuracy_reward/group_std_mean": 0.17452663481235503,
"signal/accuracy_reward/group_zero_std_frac": 0.4805555701255798,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9722905874252319,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06405164822936057,
"signal/advantage_abs_mean": 0.754323148727417,
"signal/advantage_pre_scale_abs_mean": 0.08285565674304962,
"signal/advantage_pre_scale_std": 0.13604794144630433,
"signal/advantage_std": 0.9829652667045593,
"signal/brier_reward/centered_abs_mean": 0.11799997389316559,
"signal/brier_reward/group_std_mean": 0.15085006952285768,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17907191216945648,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01179999802261591,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024614708870649336,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03595021180808544,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03735269904136658,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002461470989510417,
"signal/format_reward/centered_abs_mean": 0.011593967117369175,
"signal/format_reward/group_std_mean": 0.019906727969646452,
"signal/format_reward/group_zero_std_frac": 0.9222222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08723903000354767,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005796983558684588,
"signal/frontier_coverage_0/centered_abs_mean": 0.16741606891155242,
"signal/frontier_coverage_0/group_std_mean": 0.21475327610969544,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036335456371307376,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023940497543662787,
"signal/frontier_coverage_1/centered_abs_mean": 0.16740790605545045,
"signal/frontier_coverage_1/group_std_mean": 0.21474320888519288,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03633376285433769,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023939330130815506,
"signal/frontier_coverage_10/centered_abs_mean": 0.09415251165628433,
"signal/frontier_coverage_10/group_std_mean": 0.12194554954767227,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020430530607700347,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001346380915492773,
"signal/frontier_coverage_15/centered_abs_mean": 0.06532250344753265,
"signal/frontier_coverage_15/group_std_mean": 0.08159894198179245,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014241785556077958,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009341117809526622,
"signal/frontier_coverage_20/centered_abs_mean": 0.08967762291431428,
"signal/frontier_coverage_20/group_std_mean": 0.11331800818443298,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01956898979842663,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001282389950938523,
"signal/frontier_coverage_25/centered_abs_mean": 0.12472088485956193,
"signal/frontier_coverage_25/group_std_mean": 0.15883929431438445,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027211637794971467,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017835085978731512,
"signal/frontier_coverage_5/centered_abs_mean": 0.16699602901935579,
"signal/frontier_coverage_5/group_std_mean": 0.21422846913337706,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03624384626746178,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002388043189421296,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3240382134914398,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39173341989517213,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49400672912597654,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032403822615742685,
"step": 380
},
{
"calibration/aurc": 0.19064716793554867,
"calibration/batch_distribution_entropy": 0.9369254339434014,
"calibration/buffer_distribution_entropy": 0.9836490401269389,
"calibration/confidence_entropy": 0.495918887964904,
"calibration/coverage@0%": 0.02263309099950029,
"calibration/coverage@1%": 0.02263309099950029,
"calibration/coverage@10%": 0.31466428353582415,
"calibration/coverage@15%": 0.4292183774827497,
"calibration/coverage@20%": 0.6041198609363091,
"calibration/coverage@25%": 0.7517649378676992,
"calibration/coverage@30%": 0.8218858491876129,
"calibration/coverage@5%": 0.16429975766616695,
"calibration/ece": 0.1446770208112851,
"calibration/mean_confidence": 0.5884134436310122,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008680555555555535,
"completions/max_length": 3991.6,
"completions/max_terminated_length": 3991.6,
"completions/mean_length": 1074.7923828125,
"completions/mean_terminated_length": 1084.2991943359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.0,
"epoch": 0.9239884501443731,
"grad_norm": 0.0019680929835885763,
"learning_rate": 3.936298076923077e-06,
"loss": -0.0255,
"num_tokens": 964584497.0,
"reward": 0.9990208506584167,
"reward_std": 0.11245020627975463,
"rewards/accuracy_reward": 0.7045138835906982,
"rewards/brier_reward": 0.8249103784561157,
"rewards/confidence_uniqueness_reward": 0.9390157103538513,
"rewards/format_reward": 0.9913194298744201,
"rewards/frontier_coverage_0": 0.02678363719023764,
"rewards/frontier_coverage_1": 0.02678363719023764,
"rewards/frontier_coverage_10": 0.033601064234972,
"rewards/frontier_coverage_15": 0.07888007164001465,
"rewards/frontier_coverage_20": 0.14335883557796478,
"rewards/frontier_coverage_25": 0.22403789162635804,
"rewards/frontier_coverage_5": 0.026947683235630394,
"rewards/frontier_entropy_batch_reward": -0.333020281791687,
"signal/accuracy_reward/centered_abs_mean": 0.11980251967906952,
"signal/accuracy_reward/group_std_mean": 0.16275928020477295,
"signal/accuracy_reward/group_zero_std_frac": 0.5194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9279258489608765,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05990125983953476,
"signal/advantage_abs_mean": 0.7537263393402099,
"signal/advantage_pre_scale_abs_mean": 0.08232245296239853,
"signal/advantage_pre_scale_std": 0.14088055938482286,
"signal/advantage_std": 0.9829261660575866,
"signal/brier_reward/centered_abs_mean": 0.11604090929031372,
"signal/brier_reward/group_std_mean": 0.1511075794696808,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1813347041606903,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01160409115254879,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02827602457255125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046187874674797055,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.043776792287826535,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002827602461911738,
"signal/format_reward/centered_abs_mean": 0.01551649336470291,
"signal/format_reward/group_std_mean": 0.03089729677885771,
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11859939582645893,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007758246682351455,
"signal/frontier_coverage_0/centered_abs_mean": 0.1470422476530075,
"signal/frontier_coverage_0/group_std_mean": 0.193260794878006,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032766058668494226,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021027040667831896,
"signal/frontier_coverage_1/centered_abs_mean": 0.1470422476530075,
"signal/frontier_coverage_1/group_std_mean": 0.193260794878006,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032766058668494226,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021027040667831896,
"signal/frontier_coverage_10/centered_abs_mean": 0.08149942010641098,
"signal/frontier_coverage_10/group_std_mean": 0.10828516483306885,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018163814209401608,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001165441726334393,
"signal/frontier_coverage_15/centered_abs_mean": 0.06997437477111816,
"signal/frontier_coverage_15/group_std_mean": 0.08664604872465134,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015636095218360423,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010006335913203656,
"signal/frontier_coverage_20/centered_abs_mean": 0.10055458694696426,
"signal/frontier_coverage_20/group_std_mean": 0.12494523078203201,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022478773444890975,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014379305765032768,
"signal/frontier_coverage_25/centered_abs_mean": 0.1401418536901474,
"signal/frontier_coverage_25/group_std_mean": 0.17512567937374116,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031318724155426025,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020040284842252733,
"signal/frontier_coverage_5/centered_abs_mean": 0.14670295566320418,
"signal/frontier_coverage_5/group_std_mean": 0.1928351879119873,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032691262662410736,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020978521322831513,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.338723349571228,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4019467055797577,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5293410301208497,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03387233465909958,
"step": 385
},
{
"calibration/aurc": 0.131628539126266,
"calibration/batch_distribution_entropy": 0.9687453953559209,
"calibration/buffer_distribution_entropy": 0.984400946136547,
"calibration/confidence_entropy": 0.4489368869309242,
"calibration/coverage@0%": 0.10330473885184659,
"calibration/coverage@1%": 0.10330473885184659,
"calibration/coverage@10%": 0.49364135574786017,
"calibration/coverage@15%": 0.5628806538752541,
"calibration/coverage@20%": 0.7786696103466213,
"calibration/coverage@25%": 0.8854545709793351,
"calibration/coverage@30%": 0.9405983265947888,
"calibration/coverage@5%": 0.30226593902208354,
"calibration/ece": 0.2354010264798198,
"calibration/mean_confidence": 0.4826527057723909,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007552083333333348,
"completions/max_length": 4029.8,
"completions/max_terminated_length": 4029.8,
"completions/mean_length": 1078.279345703125,
"completions/mean_terminated_length": 1086.5503173828124,
"completions/min_length": 0.0,
"completions/min_terminated_length": 222.4,
"epoch": 0.9359883001462481,
"grad_norm": 0.001781121944077313,
"learning_rate": 3.90625e-06,
"loss": -0.0095,
"num_tokens": 980115523.0,
"reward": 1.0005874991416932,
"reward_std": 0.10873806923627853,
"rewards/accuracy_reward": 0.6994791626930237,
"rewards/brier_reward": 0.8032535314559937,
"rewards/confidence_uniqueness_reward": 0.9435503482818604,
"rewards/format_reward": 0.9924479126930237,
"rewards/frontier_coverage_0": 0.024669825052842497,
"rewards/frontier_coverage_1": 0.024669825052842497,
"rewards/frontier_coverage_10": 0.03370050191879272,
"rewards/frontier_coverage_15": 0.07053077518939972,
"rewards/frontier_coverage_20": 0.12623442858457565,
"rewards/frontier_coverage_25": 0.20001912415027617,
"rewards/frontier_coverage_5": 0.024624837329611182,
"rewards/frontier_entropy_batch_reward": -0.27270071804523466,
"signal/accuracy_reward/centered_abs_mean": 0.1298828125,
"signal/accuracy_reward/group_std_mean": 0.17255926728248597,
"signal/accuracy_reward/group_zero_std_frac": 0.5083333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0192892909049989,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06494140625,
"signal/advantage_abs_mean": 0.7568698048591613,
"signal/advantage_pre_scale_abs_mean": 0.08172477781772614,
"signal/advantage_pre_scale_std": 0.1360209256410599,
"signal/advantage_std": 0.9829101800918579,
"signal/brier_reward/centered_abs_mean": 0.1270618975162506,
"signal/brier_reward/group_std_mean": 0.16338178813457488,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20035703480243683,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012706190161406995,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02496674135327339,
"signal/confidence_uniqueness_reward/group_std_mean": 0.038834089413285255,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03995698355138302,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024966741679236294,
"signal/format_reward/centered_abs_mean": 0.013047960214316845,
"signal/format_reward/group_std_mean": 0.02432667538523674,
"signal/format_reward/group_zero_std_frac": 0.9027777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.105986687541008,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006523980107158422,
"signal/frontier_coverage_0/centered_abs_mean": 0.19202699661254882,
"signal/frontier_coverage_0/group_std_mean": 0.2493561327457428,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04318622797727585,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027459860779345036,
"signal/frontier_coverage_1/centered_abs_mean": 0.19202699661254882,
"signal/frontier_coverage_1/group_std_mean": 0.2493561327457428,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04318622797727585,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027459860779345036,
"signal/frontier_coverage_10/centered_abs_mean": 0.09750582873821259,
"signal/frontier_coverage_10/group_std_mean": 0.12793900519609452,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.021928153187036514,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013943333411589264,
"signal/frontier_coverage_15/centered_abs_mean": 0.06799988895654678,
"signal/frontier_coverage_15/group_std_mean": 0.0844225361943245,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015377411991357804,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.000972398417070508,
"signal/frontier_coverage_20/centered_abs_mean": 0.08751165270805358,
"signal/frontier_coverage_20/group_std_mean": 0.10887247174978257,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01980235055088997,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012514166766777635,
"signal/frontier_coverage_25/centered_abs_mean": 0.1190670147538185,
"signal/frontier_coverage_25/group_std_mean": 0.14888398349285126,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026936568692326544,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017026583664119244,
"signal/frontier_coverage_5/centered_abs_mean": 0.19167168736457824,
"signal/frontier_coverage_5/group_std_mean": 0.24889355897903442,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.043106149137020114,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002740905107930303,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31426780223846434,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38453606367111204,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.496795254945755,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03142678178846836,
"step": 390
},
{
"calibration/aurc": 0.12006739177635935,
"calibration/batch_distribution_entropy": 0.9621003417898498,
"calibration/buffer_distribution_entropy": 0.9850575871337772,
"calibration/confidence_entropy": 0.4640747669149102,
"calibration/coverage@0%": 0.12258103953836168,
"calibration/coverage@1%": 0.1806893877947558,
"calibration/coverage@10%": 0.5286890507883714,
"calibration/coverage@15%": 0.6967522164070671,
"calibration/coverage@20%": 0.7996866238435315,
"calibration/coverage@25%": 0.8778432408702359,
"calibration/coverage@30%": 0.9411440961700013,
"calibration/coverage@5%": 0.3512544272331463,
"calibration/ece": 0.13712931379974697,
"calibration/mean_confidence": 0.5683317673173456,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012239583333333326,
"completions/max_length": 4045.0,
"completions/max_terminated_length": 4045.0,
"completions/mean_length": 1162.4337890625,
"completions/mean_terminated_length": 1176.98583984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 252.0,
"epoch": 0.9479881501481231,
"grad_norm": 0.0016314678359776735,
"learning_rate": 3.876201923076923e-06,
"loss": -0.0318,
"num_tokens": 996640808.0,
"reward": 0.9922902107238769,
"reward_std": 0.11627082526683807,
"rewards/accuracy_reward": 0.6799479246139526,
"rewards/brier_reward": 0.8181400537490845,
"rewards/confidence_uniqueness_reward": 0.9398838996887207,
"rewards/format_reward": 0.9876736044883728,
"rewards/frontier_coverage_0": 0.04872595062479377,
"rewards/frontier_coverage_1": 0.04872595062479377,
"rewards/frontier_coverage_10": 0.04636274129152298,
"rewards/frontier_coverage_15": 0.07839905470609665,
"rewards/frontier_coverage_20": 0.13514964878559113,
"rewards/frontier_coverage_25": 0.20990723073482515,
"rewards/frontier_coverage_5": 0.04886599145829677,
"rewards/frontier_entropy_batch_reward": -0.2613369792699814,
"signal/accuracy_reward/centered_abs_mean": 0.12535264641046523,
"signal/accuracy_reward/group_std_mean": 0.16798610091209412,
"signal/accuracy_reward/group_zero_std_frac": 0.5083333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9664302825927734,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06267632320523261,
"signal/advantage_abs_mean": 0.7464218854904174,
"signal/advantage_pre_scale_abs_mean": 0.08505754321813583,
"signal/advantage_pre_scale_std": 0.14605462849140166,
"signal/advantage_std": 0.9829274535179138,
"signal/brier_reward/centered_abs_mean": 0.12280103266239166,
"signal/brier_reward/group_std_mean": 0.1588895171880722,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18971530497074127,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01228010393679142,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030538421869277955,
"signal/confidence_uniqueness_reward/group_std_mean": 0.050799714773893355,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.047202929854393005,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030538421124219894,
"signal/format_reward/centered_abs_mean": 0.02004123255610466,
"signal/format_reward/group_std_mean": 0.03813575953245163,
"signal/format_reward/group_zero_std_frac": 0.8416666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.153868405520916,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01002061627805233,
"signal/frontier_coverage_0/centered_abs_mean": 0.17273998260498047,
"signal/frontier_coverage_0/group_std_mean": 0.22136751115322112,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038172660022974016,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002470181742683053,
"signal/frontier_coverage_1/centered_abs_mean": 0.17273998260498047,
"signal/frontier_coverage_1/group_std_mean": 0.22136751115322112,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038172660022974016,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002470181742683053,
"signal/frontier_coverage_10/centered_abs_mean": 0.09057945162057876,
"signal/frontier_coverage_10/group_std_mean": 0.11731237322092056,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019993556663393974,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012952861143276095,
"signal/frontier_coverage_15/centered_abs_mean": 0.06736490577459335,
"signal/frontier_coverage_15/group_std_mean": 0.0837602436542511,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015023627690970898,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009633181616663933,
"signal/frontier_coverage_20/centered_abs_mean": 0.0906154453754425,
"signal/frontier_coverage_20/group_std_mean": 0.11392967402935028,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020246949046850204,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012958008563145995,
"signal/frontier_coverage_25/centered_abs_mean": 0.12485620528459548,
"signal/frontier_coverage_25/group_std_mean": 0.1578374296426773,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027895611152052878,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001785443676635623,
"signal/frontier_coverage_5/centered_abs_mean": 0.17239981293678283,
"signal/frontier_coverage_5/group_std_mean": 0.2209311842918396,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038096596300601956,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002465317351743579,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30889744162559507,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3786299705505371,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4820574581623077,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03088974431157112,
"step": 395
},
{
"calibration/aurc": 0.1328948076275825,
"calibration/batch_distribution_entropy": 0.9624684539519253,
"calibration/buffer_distribution_entropy": 0.9846569152425998,
"calibration/confidence_entropy": 0.49333697436117746,
"calibration/coverage@0%": 0.12847558820296975,
"calibration/coverage@1%": 0.14436599916187387,
"calibration/coverage@10%": 0.5365831021921853,
"calibration/coverage@15%": 0.6413618744077871,
"calibration/coverage@20%": 0.6948443389641212,
"calibration/coverage@25%": 0.8872661576030986,
"calibration/coverage@30%": 0.9440488461282639,
"calibration/coverage@5%": 0.4174874290234561,
"calibration/ece": 0.171763445811933,
"calibration/mean_confidence": 0.5922798731831165,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012065972222222231,
"completions/max_length": 4021.0,
"completions/max_terminated_length": 4021.0,
"completions/mean_length": 1121.1360595703125,
"completions/mean_terminated_length": 1134.825244140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 252.6,
"epoch": 0.9599880001499981,
"grad_norm": 0.001862908829934895,
"learning_rate": 3.846153846153847e-06,
"loss": -0.0404,
"num_tokens": 1012627495.0,
"reward": 0.9898022294044495,
"reward_std": 0.12157986909151078,
"rewards/accuracy_reward": 0.6822048664093018,
"rewards/brier_reward": 0.8187066793441773,
"rewards/confidence_uniqueness_reward": 0.9391976833343506,
"rewards/format_reward": 0.9879340291023254,
"rewards/frontier_coverage_0": 0.0362746462225914,
"rewards/frontier_coverage_1": 0.0362746462225914,
"rewards/frontier_coverage_10": 0.03540731780230999,
"rewards/frontier_coverage_15": 0.07417062669992447,
"rewards/frontier_coverage_20": 0.13375866413116455,
"rewards/frontier_coverage_25": 0.20971741974353791,
"rewards/frontier_coverage_5": 0.03616565503180027,
"rewards/frontier_entropy_batch_reward": -0.29090956449508665,
"signal/accuracy_reward/centered_abs_mean": 0.13099500834941863,
"signal/accuracy_reward/group_std_mean": 0.17723969519138336,
"signal/accuracy_reward/group_zero_std_frac": 0.4777777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9267226219177246,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06549750417470931,
"signal/advantage_abs_mean": 0.7497529029846192,
"signal/advantage_pre_scale_abs_mean": 0.08834384828805923,
"signal/advantage_pre_scale_std": 0.14922945201396942,
"signal/advantage_std": 0.9830656886100769,
"signal/brier_reward/centered_abs_mean": 0.1201691061258316,
"signal/brier_reward/group_std_mean": 0.15796004235744476,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17042254209518432,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012016911059617996,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031691993772983554,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05179332569241524,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.045310333371162415,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003169199451804161,
"signal/format_reward/centered_abs_mean": 0.020817056857049467,
"signal/format_reward/group_std_mean": 0.03874273598194122,
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.14898683726787568,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010408528428524733,
"signal/frontier_coverage_0/centered_abs_mean": 0.16637980341911315,
"signal/frontier_coverage_0/group_std_mean": 0.21566648483276368,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03370913192629814,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023792311549186707,
"signal/frontier_coverage_1/centered_abs_mean": 0.16637980341911315,
"signal/frontier_coverage_1/group_std_mean": 0.21566648483276368,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03370913192629814,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023792311549186707,
"signal/frontier_coverage_10/centered_abs_mean": 0.08630841374397277,
"signal/frontier_coverage_10/group_std_mean": 0.11361690014600753,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01745337210595608,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001234210329130292,
"signal/frontier_coverage_15/centered_abs_mean": 0.06675836741924286,
"signal/frontier_coverage_15/group_std_mean": 0.08330333530902863,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013568529859185219,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009546446264721454,
"signal/frontier_coverage_20/centered_abs_mean": 0.0917587623000145,
"signal/frontier_coverage_20/group_std_mean": 0.11500014364719391,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018640580773353576,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013121502473950387,
"signal/frontier_coverage_25/centered_abs_mean": 0.1281261071562767,
"signal/frontier_coverage_25/group_std_mean": 0.16147857010364533,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02601141035556793,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018322034273296594,
"signal/frontier_coverage_5/centered_abs_mean": 0.1659935176372528,
"signal/frontier_coverage_5/group_std_mean": 0.21517403721809386,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033628907054662704,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002373707154765725,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32937549352645873,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39777472615242004,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4694278180599213,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03293755128979683,
"step": 400
},
{
"epoch": 0.9599880001499981,
"eval_calibration/aurc": 0.10586284667182204,
"eval_calibration/batch_distribution_entropy": 0.9507627214676093,
"eval_calibration/buffer_distribution_entropy": 0.9840021499390136,
"eval_calibration/confidence_entropy": 0.5006953442721606,
"eval_calibration/coverage@0%": 0.379872311827957,
"eval_calibration/coverage@1%": 0.379872311827957,
"eval_calibration/coverage@10%": 0.5278897849462365,
"eval_calibration/coverage@15%": 0.6759072580645161,
"eval_calibration/coverage@20%": 0.899361559139785,
"eval_calibration/coverage@25%": 0.967741935483871,
"eval_calibration/coverage@30%": 0.9946236559139785,
"eval_calibration/coverage@5%": 0.4275873655913978,
"eval_calibration/ece": 0.2627307963709677,
"eval_calibration/mean_confidence": 0.5663138272849463,
"eval_completions/clipped_ratio": 0.01128472222222221,
"eval_completions/max_length": 3346.8333333333335,
"eval_completions/max_terminated_length": 3346.8333333333335,
"eval_completions/mean_length": 1125.2504069010417,
"eval_completions/mean_terminated_length": 1138.199727376302,
"eval_completions/min_length": 70.0,
"eval_completions/min_terminated_length": 323.1666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 1012627495.0,
"eval_reward": 0.9165649116039276,
"eval_reward_std": 0.23890877763430277,
"eval_rewards/accuracy_reward": 0.6935763955116272,
"eval_rewards/brier_reward": 0.8002510666847229,
"eval_rewards/confidence_uniqueness_reward": 0.8845955729484558,
"eval_rewards/format_reward": 0.9869791567325592,
"eval_rewards/frontier_coverage_0": 0.014418061745042602,
"eval_rewards/frontier_coverage_1": 0.014418061745042602,
"eval_rewards/frontier_coverage_10": 0.030281184454603743,
"eval_rewards/frontier_coverage_15": 0.06701069946090381,
"eval_rewards/frontier_coverage_20": 0.12111099312702815,
"eval_rewards/frontier_coverage_25": 0.19284088909626007,
"eval_rewards/frontier_coverage_5": 0.01449225222071012,
"eval_rewards/frontier_entropy_batch_reward": -0.9869791567325592,
"eval_runtime": 214.8674,
"eval_samples_per_second": 4.654,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4076063384612401,
"eval_signal/accuracy_reward/group_std_mean": 0.456951508919398,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8707355658213297,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20380316923062006,
"eval_signal/advantage_abs_mean": 0.8465096652507782,
"eval_signal/advantage_pre_scale_abs_mean": 0.20322489738464355,
"eval_signal/advantage_pre_scale_std": 0.23819045225779215,
"eval_signal/advantage_std": 0.986402283112208,
"eval_signal/brier_reward/centered_abs_mean": 0.1865755319595337,
"eval_signal/brier_reward/group_std_mean": 0.2435737227400144,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07968846708536148,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018657553009688854,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05388018364707629,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09592856466770172,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02302951893458764,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005388018364707629,
"eval_signal/format_reward/centered_abs_mean": 0.025119357431928318,
"eval_signal/format_reward/group_std_mean": 0.07066754686335723,
"eval_signal/format_reward/group_zero_std_frac": 0.6111111243565878,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.053442043562730156,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012559678715964159,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.29021725555260974,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3992450336615245,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01775054633617401,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0041501066492249565,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.29021725555260974,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3992450336615245,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01775054633617401,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041501066492249565,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.14385747288664183,
"eval_signal/frontier_coverage_10/group_std_mean": 0.20860673983891806,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.008807006757706404,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002057161880657077,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.08661755422751109,
"eval_signal/frontier_coverage_15/group_std_mean": 0.11023381600777309,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005300398683175445,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012386311039639015,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.14894455671310425,
"eval_signal/frontier_coverage_20/group_std_mean": 0.18669422467549643,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009113150803993145,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021299070601041117,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.23310701549053192,
"eval_signal/frontier_coverage_25/group_std_mean": 0.28737075130144757,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014257684350013733,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003333430349205931,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2895810604095459,
"eval_signal/frontier_coverage_5/group_std_mean": 0.39846739172935486,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.017711769479016464,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004141009141070147,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.025119357431928318,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07066754686335723,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6111111243565878,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010688409054030975,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002511935851847132,
"eval_steps_per_second": 0.028,
"step": 400
},
{
"epoch": 0.9599880001499981,
"step": 400,
"train_probe_calibration/aurc": 0.14781498801845364,
"train_probe_calibration/batch_distribution_entropy": 0.9258537799413419,
"train_probe_calibration/buffer_distribution_entropy": 0.9840014136744348,
"train_probe_calibration/confidence_entropy": 0.5174560087088435,
"train_probe_calibration/coverage@0%": 0.1831317204301075,
"train_probe_calibration/coverage@1%": 0.1831317204301075,
"train_probe_calibration/coverage@10%": 0.4227150537634408,
"train_probe_calibration/coverage@15%": 0.709845430107527,
"train_probe_calibration/coverage@20%": 0.8366935483870969,
"train_probe_calibration/coverage@25%": 0.931619623655914,
"train_probe_calibration/coverage@30%": 0.9842069892473119,
"train_probe_calibration/coverage@5%": 0.1831317204301075,
"train_probe_calibration/ece": 0.24389171706989246,
"train_probe_calibration/mean_confidence": 0.5452770329301075,
"train_probe_completions/clipped_ratio": 0.013715277777777776,
"train_probe_completions/max_length": 3625.8333333333335,
"train_probe_completions/max_terminated_length": 3625.8333333333335,
"train_probe_completions/mean_length": 1138.3854370117188,
"train_probe_completions/mean_terminated_length": 1154.0794677734375,
"train_probe_completions/min_length": 42.333333333333336,
"train_probe_completions/min_terminated_length": 249.33333333333334,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1012627495.0,
"train_probe_reward": 0.9373580018679301,
"train_probe_reward_std": 0.22980502992868423,
"train_probe_rewards/accuracy_reward": 0.7317708333333334,
"train_probe_rewards/brier_reward": 0.81206147869428,
"train_probe_rewards/confidence_uniqueness_reward": 0.8892526924610138,
"train_probe_rewards/format_reward": 0.9887152711550394,
"train_probe_rewards/frontier_coverage_0": -0.0023442222348724804,
"train_probe_rewards/frontier_coverage_1": -0.0023442222348724804,
"train_probe_rewards/frontier_coverage_10": 0.021256126773854096,
"train_probe_rewards/frontier_coverage_15": 0.06604259957869847,
"train_probe_rewards/frontier_coverage_20": 0.12492299949129422,
"train_probe_rewards/frontier_coverage_25": 0.20412471145391464,
"train_probe_rewards/frontier_coverage_5": -0.0022142972253883877,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9887152711550394,
"train_probe_runtime": 208.4445,
"train_probe_samples_per_second": 4.797,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3841688384612401,
"train_probe_signal/accuracy_reward/group_std_mean": 0.44367093841234845,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8549265762170156,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19208441923062006,
"train_probe_signal/advantage_abs_mean": 0.8276252249876658,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.1909246121843656,
"train_probe_signal/advantage_pre_scale_std": 0.22932683179775873,
"train_probe_signal/advantage_std": 0.9863846600055695,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1636638417840004,
"train_probe_signal/brier_reward/group_std_mean": 0.22015838821729025,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07289181649684906,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.016366383992135525,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05082453042268753,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.08612562467654546,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02260653271029393,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0050824531354010105,
"train_probe_signal/format_reward/centered_abs_mean": 0.021647135416666668,
"train_probe_signal/format_reward/group_std_mean": 0.057857210437456764,
"train_probe_signal/format_reward/group_zero_std_frac": 0.694444457689921,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.04728267093499502,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.010823567708333334,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2758088956276576,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.3786073128382365,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.017573293919364612,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003944066935218871,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2758088956276576,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.3786073128382365,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.017573293919364612,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003944066935218871,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.13124024122953415,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.19279029220342636,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.008360948336000243,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018767355165133874,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.07852580770850182,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.10188833996653557,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00499945521975557,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011229190470961232,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.13784591356913248,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.17390990008910498,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.008773462225993475,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019711965966659286,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.21520801385243735,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.26701483378807706,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013698053235809008,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003077474539168179,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2750549068053563,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.37767767409483594,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0175249179204305,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039332850913827615,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.021647135416666668,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.057857210437456764,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.694444457689921,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009456534404307604,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0021647136115158596,
"train_probe_steps_per_second": 0.029
},
{
"calibration/aurc": 0.16543027853562334,
"calibration/batch_distribution_entropy": 0.9582248127058456,
"calibration/buffer_distribution_entropy": 0.9839146582790985,
"calibration/confidence_entropy": 0.5125660195708808,
"calibration/coverage@0%": 0.02267736859919655,
"calibration/coverage@1%": 0.02267736859919655,
"calibration/coverage@10%": 0.22190620223407792,
"calibration/coverage@15%": 0.5628588181484097,
"calibration/coverage@20%": 0.7633469933576739,
"calibration/coverage@25%": 0.8386309124767225,
"calibration/coverage@30%": 0.9379307262569831,
"calibration/coverage@5%": 0.07492748067138952,
"calibration/ece": 0.13882747421097189,
"calibration/mean_confidence": 0.5993457340070647,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.016579861111111094,
"completions/max_length": 4040.6,
"completions/max_terminated_length": 4040.6,
"completions/mean_length": 1116.2814453125,
"completions/mean_terminated_length": 1135.14482421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 217.0,
"epoch": 0.9719878501518731,
"grad_norm": 0.001691743964329362,
"learning_rate": 3.81610576923077e-06,
"loss": -0.0443,
"num_tokens": 1028573649.0,
"reward": 0.9947227001190185,
"reward_std": 0.1256895914673805,
"rewards/accuracy_reward": 0.7001736164093018,
"rewards/brier_reward": 0.8061349272727967,
"rewards/confidence_uniqueness_reward": 0.9350310444831849,
"rewards/format_reward": 0.9833333253860473,
"rewards/frontier_coverage_0": 0.01580127151682973,
"rewards/frontier_coverage_1": 0.01580127151682973,
"rewards/frontier_coverage_10": 0.029630134254693984,
"rewards/frontier_coverage_15": 0.06885228753089905,
"rewards/frontier_coverage_20": 0.12561193257570266,
"rewards/frontier_coverage_25": 0.1998952865600586,
"rewards/frontier_coverage_5": 0.01589932944625616,
"rewards/frontier_entropy_batch_reward": -0.2788967788219452,
"signal/accuracy_reward/centered_abs_mean": 0.1270507827401161,
"signal/accuracy_reward/group_std_mean": 0.1727653205394745,
"signal/accuracy_reward/group_zero_std_frac": 0.4944444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9054166555404664,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06352539137005805,
"signal/advantage_abs_mean": 0.7439297795295715,
"signal/advantage_pre_scale_abs_mean": 0.09049908965826034,
"signal/advantage_pre_scale_std": 0.1570802301168442,
"signal/advantage_std": 0.9830506086349488,
"signal/brier_reward/centered_abs_mean": 0.12726181447505952,
"signal/brier_reward/group_std_mean": 0.16593731343746185,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18182174265384674,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01272618155926466,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03679776974022388,
"signal/confidence_uniqueness_reward/group_std_mean": 0.060738787055015564,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05204875022172928,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003679777169600129,
"signal/format_reward/centered_abs_mean": 0.026974826864898205,
"signal/format_reward/group_std_mean": 0.049242686852812766,
"signal/format_reward/group_zero_std_frac": 0.8027777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.18914935141801834,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013487413432449103,
"signal/frontier_coverage_0/centered_abs_mean": 0.17137164175510405,
"signal/frontier_coverage_0/group_std_mean": 0.2231689751148224,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03501456528902054,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002450614469125867,
"signal/frontier_coverage_1/centered_abs_mean": 0.17137164175510405,
"signal/frontier_coverage_1/group_std_mean": 0.2231689751148224,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03501456528902054,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002450614469125867,
"signal/frontier_coverage_10/centered_abs_mean": 0.08988010734319687,
"signal/frontier_coverage_10/group_std_mean": 0.11845540404319763,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018348486348986625,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012852855259552598,
"signal/frontier_coverage_15/centered_abs_mean": 0.06513071209192275,
"signal/frontier_coverage_15/group_std_mean": 0.08108891993761062,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013369975425302983,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009313691407442093,
"signal/frontier_coverage_20/centered_abs_mean": 0.08782992511987686,
"signal/frontier_coverage_20/group_std_mean": 0.10988791435956954,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01804915312677622,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001255967910401523,
"signal/frontier_coverage_25/centered_abs_mean": 0.12174516469240189,
"signal/frontier_coverage_25/group_std_mean": 0.15290275812149048,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02501319572329521,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017409559572115541,
"signal/frontier_coverage_5/centered_abs_mean": 0.17103633284568787,
"signal/frontier_coverage_5/group_std_mean": 0.22273699343204498,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03494622781872749,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024458195082843305,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32988558411598207,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39726953506469725,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47342961430549624,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03298855870962143,
"step": 405
},
{
"calibration/aurc": 0.16013978437888046,
"calibration/batch_distribution_entropy": 0.9725485062078045,
"calibration/buffer_distribution_entropy": 0.9847711501721669,
"calibration/confidence_entropy": 0.4818931492234226,
"calibration/coverage@0%": 0.00813677654975952,
"calibration/coverage@1%": 0.06125330771507116,
"calibration/coverage@10%": 0.39967957270893634,
"calibration/coverage@15%": 0.5416899920662741,
"calibration/coverage@20%": 0.6648686913581028,
"calibration/coverage@25%": 0.8260833833251107,
"calibration/coverage@30%": 0.9247456032379173,
"calibration/coverage@5%": 0.14571245549803835,
"calibration/ece": 0.17383732274943015,
"calibration/mean_confidence": 0.5179158885787857,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028298611111111094,
"completions/max_length": 4012.0,
"completions/max_terminated_length": 4012.0,
"completions/mean_length": 1138.8479248046874,
"completions/mean_terminated_length": 1172.7252685546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 261.4,
"epoch": 0.983987700153748,
"grad_norm": 0.0016860616160556674,
"learning_rate": 3.7860576923076927e-06,
"loss": -0.066,
"num_tokens": 1044789945.0,
"reward": 0.9747801542282104,
"reward_std": 0.1305119052529335,
"rewards/accuracy_reward": 0.674913203716278,
"rewards/brier_reward": 0.778337562084198,
"rewards/confidence_uniqueness_reward": 0.9256033182144165,
"rewards/format_reward": 0.9717013955116272,
"rewards/frontier_coverage_0": 0.015211716108024121,
"rewards/frontier_coverage_1": 0.015211716108024121,
"rewards/frontier_coverage_10": 0.02842825446277857,
"rewards/frontier_coverage_15": 0.06544613540172577,
"rewards/frontier_coverage_20": 0.11701254844665528,
"rewards/frontier_coverage_25": 0.18447456359863282,
"rewards/frontier_coverage_5": 0.01528911516070366,
"rewards/frontier_entropy_batch_reward": -0.25228601694107056,
"signal/accuracy_reward/centered_abs_mean": 0.1281629756093025,
"signal/accuracy_reward/group_std_mean": 0.17185668051242828,
"signal/accuracy_reward/group_zero_std_frac": 0.5000000119209289,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9030983686447144,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06408148780465125,
"signal/advantage_abs_mean": 0.741316843032837,
"signal/advantage_pre_scale_abs_mean": 0.09381006360054016,
"signal/advantage_pre_scale_std": 0.1637304425239563,
"signal/advantage_std": 0.9830609083175659,
"signal/brier_reward/centered_abs_mean": 0.14042544662952422,
"signal/brier_reward/group_std_mean": 0.18072171807289122,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1998526006937027,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014042544737458229,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04530714936554432,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07276474684476852,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06441220045089721,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045307148713618515,
"signal/format_reward/centered_abs_mean": 0.03671875,
"signal/format_reward/group_std_mean": 0.06284484639763832,
"signal/format_reward/group_zero_std_frac": 0.7638888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.26043003499507905,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.018359375,
"signal/frontier_coverage_0/centered_abs_mean": 0.18951506912708282,
"signal/frontier_coverage_0/group_std_mean": 0.24266450405120848,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03843116760253906,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027100653387606144,
"signal/frontier_coverage_1/centered_abs_mean": 0.18951506912708282,
"signal/frontier_coverage_1/group_std_mean": 0.24266450405120848,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03843116760253906,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027100653387606144,
"signal/frontier_coverage_10/centered_abs_mean": 0.10224405527114869,
"signal/frontier_coverage_10/group_std_mean": 0.13257486820220948,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02073887400329113,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001462090015411377,
"signal/frontier_coverage_15/centered_abs_mean": 0.0658559963107109,
"signal/frontier_coverage_15/group_std_mean": 0.08266130387783051,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013465725630521775,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009417407214641571,
"signal/frontier_coverage_20/centered_abs_mean": 0.08396470397710801,
"signal/frontier_coverage_20/group_std_mean": 0.1060999408364296,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017190796695649622,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012006952660158277,
"signal/frontier_coverage_25/centered_abs_mean": 0.11455650329589843,
"signal/frontier_coverage_25/group_std_mean": 0.14572837352752685,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02343129813671112,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001638158014975488,
"signal/frontier_coverage_5/centered_abs_mean": 0.18928508162498475,
"signal/frontier_coverage_5/group_std_mean": 0.24237094819545746,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038384463638067245,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002706776699051261,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31539603471755984,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3847527980804443,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4506865441799164,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03153960295021534,
"step": 410
},
{
"calibration/aurc": 0.14177884350230602,
"calibration/batch_distribution_entropy": 0.9555859143793043,
"calibration/buffer_distribution_entropy": 0.9847064709840442,
"calibration/confidence_entropy": 0.4622752762053195,
"calibration/coverage@0%": 0.05793784020545868,
"calibration/coverage@1%": 0.1714656916643446,
"calibration/coverage@10%": 0.40535473665354377,
"calibration/coverage@15%": 0.5327791772841384,
"calibration/coverage@20%": 0.6222954391422698,
"calibration/coverage@25%": 0.8344170212765958,
"calibration/coverage@30%": 0.9840425531914894,
"calibration/coverage@5%": 0.3337851415956801,
"calibration/ece": 0.19038391246975522,
"calibration/mean_confidence": 0.5694251855815575,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025173611111111115,
"completions/max_length": 4014.2,
"completions/max_terminated_length": 4014.2,
"completions/mean_length": 1131.0906982421875,
"completions/mean_terminated_length": 1160.0925537109374,
"completions/min_length": 0.0,
"completions/min_terminated_length": 264.6,
"epoch": 0.995987550155623,
"grad_norm": 0.0018346694996580482,
"learning_rate": 3.756009615384616e-06,
"loss": -0.0671,
"num_tokens": 1060962382.0,
"reward": 0.9879943609237671,
"reward_std": 0.12868785858154297,
"rewards/accuracy_reward": 0.7119791626930236,
"rewards/brier_reward": 0.8049194931983947,
"rewards/confidence_uniqueness_reward": 0.9219497919082642,
"rewards/format_reward": 0.9748263955116272,
"rewards/frontier_coverage_0": 0.009872391540557145,
"rewards/frontier_coverage_1": 0.009872391540557145,
"rewards/frontier_coverage_10": 0.02418987303972244,
"rewards/frontier_coverage_15": 0.08309966027736664,
"rewards/frontier_coverage_20": 0.15105039477348328,
"rewards/frontier_coverage_25": 0.23416467607021332,
"rewards/frontier_coverage_5": 0.009905415773391723,
"rewards/frontier_entropy_batch_reward": -0.35562185049057005,
"signal/accuracy_reward/centered_abs_mean": 0.12139756828546525,
"signal/accuracy_reward/group_std_mean": 0.16576847732067107,
"signal/accuracy_reward/group_zero_std_frac": 0.5055555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.945546567440033,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06069878414273262,
"signal/advantage_abs_mean": 0.7494885206222535,
"signal/advantage_pre_scale_abs_mean": 0.09371411204338073,
"signal/advantage_pre_scale_std": 0.16432504653930663,
"signal/advantage_std": 0.9829301834106445,
"signal/brier_reward/centered_abs_mean": 0.13123094588518142,
"signal/brier_reward/group_std_mean": 0.16721619367599488,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20494545698165895,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013123095408082009,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04638012982904911,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06866296231746674,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07306440323591232,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004638013010844588,
"signal/format_reward/centered_abs_mean": 0.03458116371184587,
"signal/format_reward/group_std_mean": 0.05476707965135574,
"signal/format_reward/group_zero_std_frac": 0.8083333492279052,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2730853110551834,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.017290581855922936,
"signal/frontier_coverage_0/centered_abs_mean": 0.15530972182750702,
"signal/frontier_coverage_0/group_std_mean": 0.1991082549095154,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03483571857213974,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022209289949387313,
"signal/frontier_coverage_1/centered_abs_mean": 0.15530972182750702,
"signal/frontier_coverage_1/group_std_mean": 0.1991082549095154,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03483571857213974,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022209289949387313,
"signal/frontier_coverage_10/centered_abs_mean": 0.08007914274930954,
"signal/frontier_coverage_10/group_std_mean": 0.10351646095514297,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01798994392156601,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001145131781231612,
"signal/frontier_coverage_15/centered_abs_mean": 0.07401133924722672,
"signal/frontier_coverage_15/group_std_mean": 0.09164203703403473,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01649995595216751,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010583621682599188,
"signal/frontier_coverage_20/centered_abs_mean": 0.1030519425868988,
"signal/frontier_coverage_20/group_std_mean": 0.12880417853593826,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02288637273013592,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014736427692696452,
"signal/frontier_coverage_25/centered_abs_mean": 0.1408381074666977,
"signal/frontier_coverage_25/group_std_mean": 0.17753379344940184,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03124292306602001,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002013984927907586,
"signal/frontier_coverage_5/centered_abs_mean": 0.15515292882919313,
"signal/frontier_coverage_5/group_std_mean": 0.19891518354415894,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03480110689997673,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022186868358403445,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34138706922531126,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40525283217430114,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.532107800245285,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03413870632648468,
"step": 415
},
{
"calibration/aurc": 0.1294395860511989,
"calibration/batch_distribution_entropy": 0.9733127958603607,
"calibration/buffer_distribution_entropy": 0.9851089148524013,
"calibration/confidence_entropy": 0.5005167237576094,
"calibration/coverage@0%": 0.07778799529760441,
"calibration/coverage@1%": 0.15989325845549915,
"calibration/coverage@10%": 0.4693895026329297,
"calibration/coverage@15%": 0.6701725069086626,
"calibration/coverage@20%": 0.7768016760018321,
"calibration/coverage@25%": 0.8552195307166761,
"calibration/coverage@30%": 0.924125391823582,
"calibration/coverage@5%": 0.3879996654336711,
"calibration/ece": 0.1661169260612711,
"calibration/mean_confidence": 0.5599344990323887,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018923611111111117,
"completions/max_length": 3409.0,
"completions/max_terminated_length": 3409.0,
"completions/mean_length": 1049.7198974609375,
"completions/mean_terminated_length": 1071.5993408203126,
"completions/min_length": 118.2,
"completions/min_terminated_length": 345.2,
"epoch": 1.0095998800015,
"grad_norm": 0.0021356670185923576,
"learning_rate": 3.725961538461539e-06,
"loss": -0.0564,
"num_tokens": 1076905659.0,
"reward": 0.9978170990943909,
"reward_std": 0.13530487269163133,
"rewards/accuracy_reward": 0.7213541507720947,
"rewards/brier_reward": 0.7981032013893128,
"rewards/confidence_uniqueness_reward": 0.9236820220947266,
"rewards/format_reward": 0.9710069417953491,
"rewards/frontier_coverage_0": -0.001099248230457306,
"rewards/frontier_coverage_1": -0.001099248230457306,
"rewards/frontier_coverage_10": 0.024827991053462027,
"rewards/frontier_coverage_15": 0.07666746973991394,
"rewards/frontier_coverage_20": 0.14054252803325654,
"rewards/frontier_coverage_25": 0.22130533158779145,
"rewards/frontier_coverage_5": -0.0010948097333312035,
"rewards/frontier_entropy_batch_reward": -0.2712071597576141,
"signal/accuracy_reward/centered_abs_mean": 0.14091796875,
"signal/accuracy_reward/group_std_mean": 0.1860247492790222,
"signal/accuracy_reward/group_zero_std_frac": 0.4777777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9652868151664734,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.070458984375,
"signal/advantage_abs_mean": 0.7590166449546814,
"signal/advantage_pre_scale_abs_mean": 0.10070272982120514,
"signal/advantage_pre_scale_std": 0.16899282336235047,
"signal/advantage_std": 0.9830989837646484,
"signal/brier_reward/centered_abs_mean": 0.13238780647516252,
"signal/brier_reward/group_std_mean": 0.16885415315628052,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18241050839424133,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01323878075927496,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04513030052185059,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06864937618374825,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.062384354323148726,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004513029893860221,
"signal/format_reward/centered_abs_mean": 0.03574218712747097,
"signal/format_reward/group_std_mean": 0.057401788979768754,
"signal/format_reward/group_zero_std_frac": 0.7972222208976746,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2461713194847107,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.017871093563735486,
"signal/frontier_coverage_0/centered_abs_mean": 0.18035004138946534,
"signal/frontier_coverage_0/group_std_mean": 0.23116945028305053,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035538754612207415,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002579005528241396,
"signal/frontier_coverage_1/centered_abs_mean": 0.18035004138946534,
"signal/frontier_coverage_1/group_std_mean": 0.23116945028305053,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035538754612207415,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002579005528241396,
"signal/frontier_coverage_10/centered_abs_mean": 0.08763528019189834,
"signal/frontier_coverage_10/group_std_mean": 0.11366626918315888,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01727503500878811,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012531845597550273,
"signal/frontier_coverage_15/centered_abs_mean": 0.06749407202005386,
"signal/frontier_coverage_15/group_std_mean": 0.08479798883199692,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013384480029344559,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009651652304455638,
"signal/frontier_coverage_20/centered_abs_mean": 0.09091575294733048,
"signal/frontier_coverage_20/group_std_mean": 0.11489048898220063,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018049764446914196,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013000952312722802,
"signal/frontier_coverage_25/centered_abs_mean": 0.12579586505889892,
"signal/frontier_coverage_25/group_std_mean": 0.15938990116119384,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024969753250479697,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001798880915157497,
"signal/frontier_coverage_5/centered_abs_mean": 0.18029914498329164,
"signal/frontier_coverage_5/group_std_mean": 0.23110443353652954,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03552853986620903,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025782777462154626,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31849284172058107,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.389286732673645,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4434321105480194,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03184928484261036,
"step": 420
},
{
"calibration/aurc": 0.1664382462609737,
"calibration/batch_distribution_entropy": 0.9444575270215367,
"calibration/buffer_distribution_entropy": 0.9855649264562201,
"calibration/confidence_entropy": 0.49403454100795496,
"calibration/coverage@0%": 0.13590126976763067,
"calibration/coverage@1%": 0.1441885625853102,
"calibration/coverage@10%": 0.18859511087385578,
"calibration/coverage@15%": 0.45050341103324143,
"calibration/coverage@20%": 0.7215650787918753,
"calibration/coverage@25%": 0.8493212377597885,
"calibration/coverage@30%": 0.946567388963566,
"calibration/coverage@5%": 0.16297309297205056,
"calibration/ece": 0.13733663184872985,
"calibration/mean_confidence": 0.6200478468294505,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01944444444444442,
"completions/max_length": 3989.8,
"completions/max_terminated_length": 3989.8,
"completions/mean_length": 1118.1100830078126,
"completions/mean_terminated_length": 1140.3765380859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 275.8,
"epoch": 1.021599730003375,
"grad_norm": 0.002060416853055358,
"learning_rate": 3.695913461538462e-06,
"loss": -0.0556,
"num_tokens": 1092907503.0,
"reward": 0.9950901508331299,
"reward_std": 0.12893914580345153,
"rewards/accuracy_reward": 0.7082465291023254,
"rewards/brier_reward": 0.8094497084617615,
"rewards/confidence_uniqueness_reward": 0.9306891322135925,
"rewards/format_reward": 0.9805555701255798,
"rewards/frontier_coverage_0": 0.01462572100572288,
"rewards/frontier_coverage_1": 0.01462572100572288,
"rewards/frontier_coverage_10": 0.03155530486255884,
"rewards/frontier_coverage_15": 0.07923973947763444,
"rewards/frontier_coverage_20": 0.14191269278526306,
"rewards/frontier_coverage_25": 0.22000607550144197,
"rewards/frontier_coverage_5": 0.014647024078294634,
"rewards/frontier_entropy_batch_reward": -0.30712297558784485,
"signal/accuracy_reward/centered_abs_mean": 0.12706705778837205,
"signal/accuracy_reward/group_std_mean": 0.1701394349336624,
"signal/accuracy_reward/group_zero_std_frac": 0.5083333313465118,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9200910568237305,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06353352889418602,
"signal/advantage_abs_mean": 0.7581474661827088,
"signal/advantage_pre_scale_abs_mean": 0.09590282887220383,
"signal/advantage_pre_scale_std": 0.16368852853775023,
"signal/advantage_std": 0.9830290079116821,
"signal/brier_reward/centered_abs_mean": 0.12702373564243316,
"signal/brier_reward/group_std_mean": 0.1641145259141922,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1852224737405777,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01270237360149622,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.041591137647628784,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06271186843514442,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06073887199163437,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041591140907257795,
"signal/format_reward/centered_abs_mean": 0.03104383647441864,
"signal/format_reward/group_std_mean": 0.04997814521193504,
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.22688681483268738,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01552191823720932,
"signal/frontier_coverage_0/centered_abs_mean": 0.15112363398075104,
"signal/frontier_coverage_0/group_std_mean": 0.1970497488975525,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03148054778575897,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021610677940770984,
"signal/frontier_coverage_1/centered_abs_mean": 0.15112363398075104,
"signal/frontier_coverage_1/group_std_mean": 0.1970497488975525,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03148054778575897,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021610677940770984,
"signal/frontier_coverage_10/centered_abs_mean": 0.06746098995208741,
"signal/frontier_coverage_10/group_std_mean": 0.08946224302053452,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014050611481070518,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009646921185776592,
"signal/frontier_coverage_15/centered_abs_mean": 0.07157327681779861,
"signal/frontier_coverage_15/group_std_mean": 0.08971850723028182,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014921391755342484,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010234977817162871,
"signal/frontier_coverage_20/centered_abs_mean": 0.10281662493944169,
"signal/frontier_coverage_20/group_std_mean": 0.12900976240634918,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021420946344733238,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014702777145430446,
"signal/frontier_coverage_25/centered_abs_mean": 0.1416828900575638,
"signal/frontier_coverage_25/group_std_mean": 0.17844592332839965,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02950539030134678,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002026065322570503,
"signal/frontier_coverage_5/centered_abs_mean": 0.1510587751865387,
"signal/frontier_coverage_5/group_std_mean": 0.19697055518627166,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03146698512136936,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021601404063403607,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33122584223747253,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3963626027107239,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48287222981452943,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03312258496880531,
"step": 425
},
{
"calibration/aurc": 0.08296184763617191,
"calibration/batch_distribution_entropy": 0.9267126629409962,
"calibration/buffer_distribution_entropy": 0.9845992025019553,
"calibration/confidence_entropy": 0.47815104058824354,
"calibration/coverage@0%": 0.1433396054307729,
"calibration/coverage@1%": 0.2590142086053761,
"calibration/coverage@10%": 0.7602710057627717,
"calibration/coverage@15%": 0.8728334245575626,
"calibration/coverage@20%": 0.9432129173508483,
"calibration/coverage@25%": 0.9770114942528736,
"calibration/coverage@30%": 0.9994252873563217,
"calibration/coverage@5%": 0.4102373856000341,
"calibration/ece": 0.21019582274960474,
"calibration/mean_confidence": 0.6381662601107146,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015017361111111117,
"completions/max_length": 3976.8,
"completions/max_terminated_length": 3976.8,
"completions/mean_length": 1031.4753540039062,
"completions/mean_terminated_length": 1046.8734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 241.6,
"epoch": 1.03359958000525,
"grad_norm": 0.002130492590367794,
"learning_rate": 3.665865384615385e-06,
"loss": -0.0395,
"num_tokens": 1107866579.0,
"reward": 1.0310980796813964,
"reward_std": 0.11252327859401703,
"rewards/accuracy_reward": 0.7815972208976746,
"rewards/brier_reward": 0.8353811860084533,
"rewards/confidence_uniqueness_reward": 0.930880856513977,
"rewards/format_reward": 0.9849826335906983,
"rewards/frontier_coverage_0": -0.011007923632860184,
"rewards/frontier_coverage_1": -0.011007923632860184,
"rewards/frontier_coverage_10": 0.02423018105328083,
"rewards/frontier_coverage_15": 0.10173185169696808,
"rewards/frontier_coverage_20": 0.18633103370666504,
"rewards/frontier_coverage_25": 0.2875380277633667,
"rewards/frontier_coverage_5": -0.010987864434719085,
"rewards/frontier_entropy_batch_reward": -0.3692371368408203,
"signal/accuracy_reward/centered_abs_mean": 0.10270182341337204,
"signal/accuracy_reward/group_std_mean": 0.142086860537529,
"signal/accuracy_reward/group_zero_std_frac": 0.569444453716278,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8557999968528748,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05135091170668602,
"signal/advantage_abs_mean": 0.7485065340995789,
"signal/advantage_pre_scale_abs_mean": 0.08068742454051972,
"signal/advantage_pre_scale_std": 0.14736481308937072,
"signal/advantage_std": 0.9828257083892822,
"signal/brier_reward/centered_abs_mean": 0.1104082152247429,
"signal/brier_reward/group_std_mean": 0.14500466585159302,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1834684669971466,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011040821857750415,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034445105493068694,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05594793781638145,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05692398175597191,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003444510605186224,
"signal/format_reward/centered_abs_mean": 0.022271049953997136,
"signal/format_reward/group_std_mean": 0.04136303998529911,
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.18274498283863067,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011135524976998568,
"signal/frontier_coverage_0/centered_abs_mean": 0.13331829905509948,
"signal/frontier_coverage_0/group_std_mean": 0.17565890848636628,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031815215945243835,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019064516527578235,
"signal/frontier_coverage_1/centered_abs_mean": 0.13331829905509948,
"signal/frontier_coverage_1/group_std_mean": 0.17565890848636628,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031815215945243835,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019064516527578235,
"signal/frontier_coverage_10/centered_abs_mean": 0.06181113198399544,
"signal/frontier_coverage_10/group_std_mean": 0.08169252574443817,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01474505104124546,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008838991867378354,
"signal/frontier_coverage_15/centered_abs_mean": 0.07603696435689926,
"signal/frontier_coverage_15/group_std_mean": 0.09389333873987198,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018103727698326112,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010873286053538322,
"signal/frontier_coverage_20/centered_abs_mean": 0.10807138234376908,
"signal/frontier_coverage_20/group_std_mean": 0.13423685133457183,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025709601119160652,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015454208012670278,
"signal/frontier_coverage_25/centered_abs_mean": 0.14581511318683624,
"signal/frontier_coverage_25/group_std_mean": 0.18234173357486724,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.034686730802059175,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002085156296379864,
"signal/frontier_coverage_5/centered_abs_mean": 0.1332632303237915,
"signal/frontier_coverage_5/group_std_mean": 0.17558786869049073,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03180203214287758,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001905664219520986,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33877058029174806,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4033771097660065,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5624748587608337,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03387705832719803,
"step": 430
},
{
"calibration/aurc": 0.17993360872951467,
"calibration/batch_distribution_entropy": 0.956776656487045,
"calibration/buffer_distribution_entropy": 0.9847372028422502,
"calibration/confidence_entropy": 0.48640155317854983,
"calibration/coverage@0%": 0.00976978220315286,
"calibration/coverage@1%": 0.00976978220315286,
"calibration/coverage@10%": 0.47534625919883966,
"calibration/coverage@15%": 0.5970449645029289,
"calibration/coverage@20%": 0.6671838796988296,
"calibration/coverage@25%": 0.7335380912216357,
"calibration/coverage@30%": 0.8138264319625941,
"calibration/coverage@5%": 0.18928673568716697,
"calibration/ece": 0.18617977830276256,
"calibration/mean_confidence": 0.5551562162905087,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.030989583333333348,
"completions/max_length": 4045.4,
"completions/max_terminated_length": 4045.4,
"completions/mean_length": 1006.7073974609375,
"completions/mean_terminated_length": 1038.9220458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 263.2,
"epoch": 1.045599430007125,
"grad_norm": 0.002083980944007635,
"learning_rate": 3.635817307692308e-06,
"loss": -0.0897,
"num_tokens": 1122548360.0,
"reward": 0.9809871554374695,
"reward_std": 0.14432497471570968,
"rewards/accuracy_reward": 0.6943576335906982,
"rewards/brier_reward": 0.7956489801406861,
"rewards/confidence_uniqueness_reward": 0.9197240710258484,
"rewards/format_reward": 0.9684895873069763,
"rewards/frontier_coverage_0": 0.018659752607345582,
"rewards/frontier_coverage_1": 0.018659752607345582,
"rewards/frontier_coverage_10": 0.03520567715167999,
"rewards/frontier_coverage_15": 0.08243112862110138,
"rewards/frontier_coverage_20": 0.1451416015625,
"rewards/frontier_coverage_25": 0.22207084000110627,
"rewards/frontier_coverage_5": 0.018691231869161128,
"rewards/frontier_entropy_batch_reward": -0.29708088040351865,
"signal/accuracy_reward/centered_abs_mean": 0.12945420891046525,
"signal/accuracy_reward/group_std_mean": 0.17190252244472504,
"signal/accuracy_reward/group_zero_std_frac": 0.5055555641651154,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.879289448261261,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06472710445523262,
"signal/advantage_abs_mean": 0.7449347257614136,
"signal/advantage_pre_scale_abs_mean": 0.10397075414657593,
"signal/advantage_pre_scale_std": 0.18122569024562835,
"signal/advantage_std": 0.9831098794937134,
"signal/brier_reward/centered_abs_mean": 0.13766922950744628,
"signal/brier_reward/group_std_mean": 0.17730204164981841,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18823137879371643,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013766923174262046,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05718742534518242,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09346490800380707,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07706695050001144,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005718742869794369,
"signal/format_reward/centered_abs_mean": 0.04926757887005806,
"signal/format_reward/group_std_mean": 0.08465958237648011,
"signal/format_reward/group_zero_std_frac": 0.6777777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.32885663509368895,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02463378943502903,
"signal/frontier_coverage_0/centered_abs_mean": 0.17320359647274017,
"signal/frontier_coverage_0/group_std_mean": 0.22111334800720214,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033909741789102554,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024768114555627106,
"signal/frontier_coverage_1/centered_abs_mean": 0.17320359647274017,
"signal/frontier_coverage_1/group_std_mean": 0.22111334800720214,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033909741789102554,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024768114555627106,
"signal/frontier_coverage_10/centered_abs_mean": 0.0769290342926979,
"signal/frontier_coverage_10/group_std_mean": 0.09959446638822556,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015100923553109168,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011000852100551128,
"signal/frontier_coverage_15/centered_abs_mean": 0.07145349681377411,
"signal/frontier_coverage_15/group_std_mean": 0.08837363570928573,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014147062785923481,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010217850678600371,
"signal/frontier_coverage_20/centered_abs_mean": 0.09702417999505997,
"signal/frontier_coverage_20/group_std_mean": 0.12051473706960678,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019190432131290437,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013874457916244864,
"signal/frontier_coverage_25/centered_abs_mean": 0.13127293437719345,
"signal/frontier_coverage_25/group_std_mean": 0.16414665877819062,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025929966941475868,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018772028852254152,
"signal/frontier_coverage_5/centered_abs_mean": 0.17312270998954774,
"signal/frontier_coverage_5/group_std_mean": 0.22101564705371857,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03389389365911484,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024756547063589096,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33052846789360046,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3995340406894684,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45733819007873533,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03305284790694714,
"step": 435
},
{
"calibration/aurc": 0.14388746882850229,
"calibration/batch_distribution_entropy": 0.9284179641062792,
"calibration/buffer_distribution_entropy": 0.9853219225343312,
"calibration/confidence_entropy": 0.49713957100109135,
"calibration/coverage@0%": 0.051848325523578695,
"calibration/coverage@1%": 0.051848325523578695,
"calibration/coverage@10%": 0.5079058909249714,
"calibration/coverage@15%": 0.7396266713421134,
"calibration/coverage@20%": 0.816502049642694,
"calibration/coverage@25%": 0.8440807137496872,
"calibration/coverage@30%": 0.866268656716418,
"calibration/coverage@5%": 0.30370166027493467,
"calibration/ece": 0.1480970565108381,
"calibration/mean_confidence": 0.5872938701596123,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04756944444444446,
"completions/max_length": 4001.4,
"completions/max_terminated_length": 4001.4,
"completions/mean_length": 987.0814208984375,
"completions/mean_terminated_length": 1037.7956420898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 248.2,
"epoch": 1.057599280009,
"grad_norm": 0.0020778202451765537,
"learning_rate": 3.605769230769231e-06,
"loss": -0.1183,
"num_tokens": 1137019666.0,
"reward": 0.9766067028045654,
"reward_std": 0.17658871114254,
"rewards/accuracy_reward": 0.7150173664093018,
"rewards/brier_reward": 0.7861824035644531,
"rewards/confidence_uniqueness_reward": 0.8997833251953125,
"rewards/format_reward": 0.9480902791023255,
"rewards/frontier_coverage_0": -0.008721314929425716,
"rewards/frontier_coverage_1": -0.008721314929425716,
"rewards/frontier_coverage_10": 0.024878227338194846,
"rewards/frontier_coverage_15": 0.085553839802742,
"rewards/frontier_coverage_20": 0.1561792552471161,
"rewards/frontier_coverage_25": 0.24047624468803405,
"rewards/frontier_coverage_5": -0.008641589153558015,
"rewards/frontier_entropy_batch_reward": -0.30422061681747437,
"signal/accuracy_reward/centered_abs_mean": 0.13926323801279067,
"signal/accuracy_reward/group_std_mean": 0.19407737255096436,
"signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7671608686447143,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06963161900639533,
"signal/advantage_abs_mean": 0.7121507525444031,
"signal/advantage_pre_scale_abs_mean": 0.1229197159409523,
"signal/advantage_pre_scale_std": 0.21047253012657166,
"signal/advantage_std": 0.9833353638648987,
"signal/brier_reward/centered_abs_mean": 0.14424349814653398,
"signal/brier_reward/group_std_mean": 0.19108721613883972,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15954833924770356,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014424350298941135,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08067524954676628,
"signal/confidence_uniqueness_reward/group_std_mean": 0.12732555568218232,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08215240314602852,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008067525085061788,
"signal/format_reward/centered_abs_mean": 0.07577040046453476,
"signal/format_reward/group_std_mean": 0.12256582081317902,
"signal/format_reward/group_zero_std_frac": 0.5611111134290695,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.3762800365686417,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.03788520023226738,
"signal/frontier_coverage_0/centered_abs_mean": 0.15879909992218016,
"signal/frontier_coverage_0/group_std_mean": 0.20415432155132293,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.025722567364573477,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022708271164447067,
"signal/frontier_coverage_1/centered_abs_mean": 0.15879909992218016,
"signal/frontier_coverage_1/group_std_mean": 0.20415432155132293,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.025722567364573477,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022708271164447067,
"signal/frontier_coverage_10/centered_abs_mean": 0.06879703104496002,
"signal/frontier_coverage_10/group_std_mean": 0.08966280072927475,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011097630951553583,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009837975027039646,
"signal/frontier_coverage_15/centered_abs_mean": 0.07169133126735687,
"signal/frontier_coverage_15/group_std_mean": 0.08976521641016007,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011832451168447732,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010251860483549535,
"signal/frontier_coverage_20/centered_abs_mean": 0.1028002068400383,
"signal/frontier_coverage_20/group_std_mean": 0.1287078246474266,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01704120673239231,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001470042997971177,
"signal/frontier_coverage_25/centered_abs_mean": 0.14137257635593414,
"signal/frontier_coverage_25/group_std_mean": 0.17798839807510375,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023427498526871204,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002021627686917782,
"signal/frontier_coverage_5/centered_abs_mean": 0.15861307382583617,
"signal/frontier_coverage_5/group_std_mean": 0.20392609238624573,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.025691739097237588,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022681670263409613,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33480705618858336,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3987271010875702,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3848613739013672,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03348070606589317,
"step": 440
},
{
"calibration/aurc": 0.10326447524720148,
"calibration/batch_distribution_entropy": 0.9536010778222744,
"calibration/buffer_distribution_entropy": 0.9841287707055834,
"calibration/confidence_entropy": 0.4816929863011735,
"calibration/coverage@0%": 0.05213600366138711,
"calibration/coverage@1%": 0.08546933699472044,
"calibration/coverage@10%": 0.5888607070042604,
"calibration/coverage@15%": 0.7645768229948711,
"calibration/coverage@20%": 0.8811925409547783,
"calibration/coverage@25%": 0.9414970339001931,
"calibration/coverage@30%": 0.9771117166212534,
"calibration/coverage@5%": 0.3900736698300794,
"calibration/ece": 0.14976913086003144,
"calibration/mean_confidence": 0.6172107058464944,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01597222222222223,
"completions/max_length": 3980.4,
"completions/max_terminated_length": 3980.4,
"completions/mean_length": 953.7552978515625,
"completions/mean_terminated_length": 969.312060546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 243.4,
"epoch": 1.069599130010875,
"grad_norm": 0.0019089620327576995,
"learning_rate": 3.575721153846154e-06,
"loss": -0.0578,
"num_tokens": 1151063951.0,
"reward": 1.0075167655944823,
"reward_std": 0.125567664206028,
"rewards/accuracy_reward": 0.7283854126930237,
"rewards/brier_reward": 0.8320141434669495,
"rewards/confidence_uniqueness_reward": 0.9312868356704712,
"rewards/format_reward": 0.9836805582046508,
"rewards/frontier_coverage_0": 0.01591839836910367,
"rewards/frontier_coverage_1": 0.01591839836910367,
"rewards/frontier_coverage_10": 0.03556139282882213,
"rewards/frontier_coverage_15": 0.09857904762029648,
"rewards/frontier_coverage_20": 0.17490629553794862,
"rewards/frontier_coverage_25": 0.2656311184167862,
"rewards/frontier_coverage_5": 0.015986279817298055,
"rewards/frontier_entropy_batch_reward": -0.3374809443950653,
"signal/accuracy_reward/centered_abs_mean": 0.11180012971162796,
"signal/accuracy_reward/group_std_mean": 0.15532722175121308,
"signal/accuracy_reward/group_zero_std_frac": 0.5277777791023255,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8297061920166016,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05590006485581398,
"signal/advantage_abs_mean": 0.7285216093063355,
"signal/advantage_pre_scale_abs_mean": 0.08672792613506317,
"signal/advantage_pre_scale_std": 0.1563153862953186,
"signal/advantage_std": 0.9829874873161316,
"signal/brier_reward/centered_abs_mean": 0.11765489429235458,
"signal/brier_reward/group_std_mean": 0.1569055736064911,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17583496868610382,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011765489727258683,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0403615452349186,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07109490633010865,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060684775561094285,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0040361546911299225,
"signal/format_reward/centered_abs_mean": 0.02916666679084301,
"signal/format_reward/group_std_mean": 0.05808819979429245,
"signal/format_reward/group_zero_std_frac": 0.7527777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21720606386661528,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014583333395421505,
"signal/frontier_coverage_0/centered_abs_mean": 0.14205318093299865,
"signal/frontier_coverage_0/group_std_mean": 0.18557943999767304,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030138077586889266,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002031360566616058,
"signal/frontier_coverage_1/centered_abs_mean": 0.14205318093299865,
"signal/frontier_coverage_1/group_std_mean": 0.18557943999767304,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030138077586889266,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002031360566616058,
"signal/frontier_coverage_10/centered_abs_mean": 0.06313612163066865,
"signal/frontier_coverage_10/group_std_mean": 0.08182553499937058,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013453066535294055,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009028465370647609,
"signal/frontier_coverage_15/centered_abs_mean": 0.07494814544916154,
"signal/frontier_coverage_15/group_std_mean": 0.09314066916704178,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01615871414542198,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010717584751546382,
"signal/frontier_coverage_20/centered_abs_mean": 0.10805933326482772,
"signal/frontier_coverage_20/group_std_mean": 0.13530959486961364,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023342077061533927,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015452483668923378,
"signal/frontier_coverage_25/centered_abs_mean": 0.14793600142002106,
"signal/frontier_coverage_25/group_std_mean": 0.18606266975402833,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0319239042699337,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002115484792739153,
"signal/frontier_coverage_5/centered_abs_mean": 0.14187667965888978,
"signal/frontier_coverage_5/group_std_mean": 0.18535732924938203,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030100544169545173,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002028836542740464,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33231388330459594,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40017271041870117,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5018444716930389,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03323138877749443,
"step": 445
},
{
"calibration/aurc": 0.08478005706856111,
"calibration/batch_distribution_entropy": 0.9683973579606967,
"calibration/buffer_distribution_entropy": 0.9837530232424537,
"calibration/confidence_entropy": 0.48627515320497283,
"calibration/coverage@0%": 0.24325156703800324,
"calibration/coverage@1%": 0.2568285905366977,
"calibration/coverage@10%": 0.5993765100063526,
"calibration/coverage@15%": 0.8358934737999405,
"calibration/coverage@20%": 0.9019483275751551,
"calibration/coverage@25%": 0.9727829060531924,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.40373817347591984,
"calibration/ece": 0.21289906688097188,
"calibration/mean_confidence": 0.5514703829801496,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015885416666666673,
"completions/max_length": 3790.4,
"completions/max_terminated_length": 3790.4,
"completions/mean_length": 965.8823120117188,
"completions/mean_terminated_length": 981.3806762695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 222.6,
"epoch": 1.08159898001275,
"grad_norm": 0.0022360687144100666,
"learning_rate": 3.5456730769230774e-06,
"loss": -0.0372,
"num_tokens": 1165294179.0,
"reward": 0.9963475942611695,
"reward_std": 0.12414093762636184,
"rewards/accuracy_reward": 0.6988715410232544,
"rewards/brier_reward": 0.810006046295166,
"rewards/confidence_uniqueness_reward": 0.9354828357696533,
"rewards/format_reward": 0.98359375,
"rewards/frontier_coverage_0": 0.028983466140925885,
"rewards/frontier_coverage_1": 0.028983466140925885,
"rewards/frontier_coverage_10": 0.03974376879632473,
"rewards/frontier_coverage_15": 0.08501660823822021,
"rewards/frontier_coverage_20": 0.1462234228849411,
"rewards/frontier_coverage_25": 0.22431569099426268,
"rewards/frontier_coverage_5": 0.028991557843983173,
"rewards/frontier_entropy_batch_reward": -0.27760238349437716,
"signal/accuracy_reward/centered_abs_mean": 0.1218912735581398,
"signal/accuracy_reward/group_std_mean": 0.16463150084018707,
"signal/accuracy_reward/group_zero_std_frac": 0.5083333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8862983822822571,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0609456367790699,
"signal/advantage_abs_mean": 0.7339087724685669,
"signal/advantage_pre_scale_abs_mean": 0.08842306286096573,
"signal/advantage_pre_scale_std": 0.15372338891029358,
"signal/advantage_std": 0.9830293416976928,
"signal/brier_reward/centered_abs_mean": 0.13105346411466598,
"signal/brier_reward/group_std_mean": 0.17164418697357178,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19119353890419005,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013105347007513046,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03763534687459469,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06680730283260346,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.055056449770927426,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037635347805917265,
"signal/format_reward/centered_abs_mean": 0.02844509556889534,
"signal/format_reward/group_std_mean": 0.056251946836709976,
"signal/format_reward/group_zero_std_frac": 0.7583333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.20870205760002136,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01422254778444767,
"signal/frontier_coverage_0/centered_abs_mean": 0.18482233881950377,
"signal/frontier_coverage_0/group_std_mean": 0.23658272325992585,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038531605154275894,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002642959402874112,
"signal/frontier_coverage_1/centered_abs_mean": 0.18482233881950377,
"signal/frontier_coverage_1/group_std_mean": 0.23658272325992585,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038531605154275894,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002642959402874112,
"signal/frontier_coverage_10/centered_abs_mean": 0.07772842198610305,
"signal/frontier_coverage_10/group_std_mean": 0.1001784086227417,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016198632307350636,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001111516449600458,
"signal/frontier_coverage_15/centered_abs_mean": 0.06999329477548599,
"signal/frontier_coverage_15/group_std_mean": 0.0872062012553215,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014550425298511981,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010009041405282914,
"signal/frontier_coverage_20/centered_abs_mean": 0.09316149204969407,
"signal/frontier_coverage_20/group_std_mean": 0.11618510782718658,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019368303567171098,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001332209282554686,
"signal/frontier_coverage_25/centered_abs_mean": 0.1268085092306137,
"signal/frontier_coverage_25/group_std_mean": 0.15882777273654938,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026379085332155227,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001813361677341163,
"signal/frontier_coverage_5/centered_abs_mean": 0.18448520302772523,
"signal/frontier_coverage_5/group_std_mean": 0.23615312576293945,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03846092000603676,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026381383650004864,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3260856032371521,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3959659218788147,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47359164953231814,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03260856196284294,
"step": 450
},
{
"epoch": 1.08159898001275,
"eval_calibration/aurc": 0.13309575401983562,
"eval_calibration/batch_distribution_entropy": 0.936133368392568,
"eval_calibration/buffer_distribution_entropy": 0.9838314799658758,
"eval_calibration/confidence_entropy": 0.4708227347238358,
"eval_calibration/coverage@0%": 0.26797715053763443,
"eval_calibration/coverage@1%": 0.26797715053763443,
"eval_calibration/coverage@10%": 0.5540994623655914,
"eval_calibration/coverage@15%": 0.6597782258064516,
"eval_calibration/coverage@20%": 0.7594086021505376,
"eval_calibration/coverage@25%": 0.9164986559139785,
"eval_calibration/coverage@30%": 0.9375,
"eval_calibration/coverage@5%": 0.3323252688172043,
"eval_calibration/ece": 0.21734695889336916,
"eval_calibration/mean_confidence": 0.5601244476926523,
"eval_completions/clipped_ratio": 0.005208333333333333,
"eval_completions/max_length": 3039.8333333333335,
"eval_completions/max_terminated_length": 3039.8333333333335,
"eval_completions/mean_length": 954.2652994791666,
"eval_completions/mean_terminated_length": 959.2304890950521,
"eval_completions/min_length": 99.5,
"eval_completions/min_terminated_length": 270.5,
"eval_loss": 0.0,
"eval_num_tokens": 1165294179.0,
"eval_reward": 0.9161022206147512,
"eval_reward_std": 0.2383043939868609,
"eval_rewards/accuracy_reward": 0.6814236044883728,
"eval_rewards/brier_reward": 0.8101389706134796,
"eval_rewards/confidence_uniqueness_reward": 0.8908315300941467,
"eval_rewards/format_reward": 0.9921875,
"eval_rewards/frontier_coverage_0": 0.03702201593356828,
"eval_rewards/frontier_coverage_1": 0.03702201593356828,
"eval_rewards/frontier_coverage_10": 0.04230095911771059,
"eval_rewards/frontier_coverage_15": 0.08261789381504059,
"eval_rewards/frontier_coverage_20": 0.14037525778015456,
"eval_rewards/frontier_coverage_25": 0.21233193079630533,
"eval_rewards/frontier_coverage_5": 0.037026698933914304,
"eval_rewards/frontier_entropy_batch_reward": -0.9921875,
"eval_runtime": 204.9493,
"eval_samples_per_second": 4.879,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4199761301279068,
"eval_signal/accuracy_reward/group_std_mean": 0.4641881287097931,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8903038104375204,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2099880650639534,
"eval_signal/advantage_abs_mean": 0.869709312915802,
"eval_signal/advantage_pre_scale_abs_mean": 0.20785571883122125,
"eval_signal/advantage_pre_scale_std": 0.23637428879737854,
"eval_signal/advantage_std": 0.9864057501157125,
"eval_signal/brier_reward/centered_abs_mean": 0.18304560085137686,
"eval_signal/brier_reward/group_std_mean": 0.23924180368582407,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07741126045584679,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01830456079915166,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047562570621569954,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07460235804319382,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020098049348841112,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004756257132006188,
"eval_signal/format_reward/centered_abs_mean": 0.015136718439559141,
"eval_signal/format_reward/group_std_mean": 0.044194173688689865,
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.03172660774240891,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359219779571,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2983556042114894,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4108336369196574,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.018101781296233337,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004266485145005087,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2983556042114894,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4108336369196574,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.018101781296233337,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004266485145005087,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.10858400911092758,
"eval_signal/frontier_coverage_10/group_std_mean": 0.1563408076763153,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006582974921911955,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015527513654281695,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.09981824830174446,
"eval_signal/frontier_coverage_15/group_std_mean": 0.12792696679631868,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0060469558617721,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014274009154178202,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1740375558535258,
"eval_signal/frontier_coverage_20/group_std_mean": 0.21623691419760385,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.010545457247644663,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024887369945645332,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2637837479511897,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3216549704472224,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015982618710647028,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003772107457431654,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2975670297940572,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4098781496286392,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01805388368666172,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0042552082644154625,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.015136718439559141,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.044194173688689865,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.750000019868215,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.006345321889966726,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0015136719642517467,
"eval_steps_per_second": 0.029,
"step": 450
},
{
"epoch": 1.08159898001275,
"step": 450,
"train_probe_calibration/aurc": 0.09654250782712287,
"train_probe_calibration/batch_distribution_entropy": 0.9183525716825388,
"train_probe_calibration/buffer_distribution_entropy": 0.9837598538858333,
"train_probe_calibration/confidence_entropy": 0.4956307539442461,
"train_probe_calibration/coverage@0%": 0.3541666666666667,
"train_probe_calibration/coverage@1%": 0.3541666666666667,
"train_probe_calibration/coverage@10%": 0.671875,
"train_probe_calibration/coverage@15%": 0.7864583333333334,
"train_probe_calibration/coverage@20%": 0.8541666666666666,
"train_probe_calibration/coverage@25%": 0.9114583333333334,
"train_probe_calibration/coverage@30%": 0.9583333333333334,
"train_probe_calibration/coverage@5%": 0.3958333333333333,
"train_probe_calibration/ece": 0.23860572916666667,
"train_probe_calibration/mean_confidence": 0.5797567708333333,
"train_probe_completions/clipped_ratio": 0.004340277777777772,
"train_probe_completions/max_length": 2840.3333333333335,
"train_probe_completions/max_terminated_length": 2840.3333333333335,
"train_probe_completions/mean_length": 917.0496826171875,
"train_probe_completions/mean_terminated_length": 921.0797424316406,
"train_probe_completions/min_length": 113.16666666666667,
"train_probe_completions/min_terminated_length": 202.16666666666666,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1165294179.0,
"train_probe_reward": 0.9429791967074076,
"train_probe_reward_std": 0.22729473561048508,
"train_probe_rewards/accuracy_reward": 0.7274305621782938,
"train_probe_rewards/brier_reward": 0.8345652719338735,
"train_probe_rewards/confidence_uniqueness_reward": 0.8907654881477356,
"train_probe_rewards/format_reward": 0.9930555522441864,
"train_probe_rewards/frontier_coverage_0": 0.02799505041912198,
"train_probe_rewards/frontier_coverage_1": 0.02799505041912198,
"train_probe_rewards/frontier_coverage_10": 0.04346960255255302,
"train_probe_rewards/frontier_coverage_15": 0.10106463233629863,
"train_probe_rewards/frontier_coverage_20": 0.17408683399359384,
"train_probe_rewards/frontier_coverage_25": 0.2622348219156265,
"train_probe_rewards/frontier_coverage_5": 0.02808955203120907,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9930555522441864,
"train_probe_runtime": 190.26,
"train_probe_samples_per_second": 5.256,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3901909738779068,
"train_probe_signal/accuracy_reward/group_std_mean": 0.44776545961697894,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8720936874548594,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1950954869389534,
"train_probe_signal/advantage_abs_mean": 0.8436907827854156,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.1921898971001307,
"train_probe_signal/advantage_pre_scale_std": 0.22606053948402405,
"train_probe_signal/advantage_std": 0.9863827129205068,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1621926079193751,
"train_probe_signal/brier_reward/group_std_mean": 0.21996241311232248,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07256464473903179,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.016219260947157938,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04783617208401362,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.07148952161272366,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021304875301818054,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004783617487798135,
"train_probe_signal/format_reward/centered_abs_mean": 0.013346354011446238,
"train_probe_signal/format_reward/group_std_mean": 0.0362943010404706,
"train_probe_signal/format_reward/group_zero_std_frac": 0.8055555721124014,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.029005679301917553,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.006673177005723119,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2803582151730855,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.3964219441016515,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.017947336037953694,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004009122572218378,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2803582151730855,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.3964219441016515,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.017947336037953694,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004009122572218378,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.10028981169064839,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.1465649058421453,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006421741874267657,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001434144234129538,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1032271757721901,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.12874728937943777,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0066018542274832726,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014761485702668626,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.17853716760873795,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.21422516802946726,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011413250584155321,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025530814891681075,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.26261725028355914,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.3138073782126109,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01678881049156189,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037554265776028237,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2796053687731425,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3954645246267319,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01789912985016902,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00399835667728136,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013346354011446238,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0362943010404706,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8055555721124014,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005801136062170069,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013346354438302417,
"train_probe_steps_per_second": 0.032
},
{
"calibration/aurc": 0.1786189017935566,
"calibration/batch_distribution_entropy": 0.9565413659875921,
"calibration/buffer_distribution_entropy": 0.9837801817834313,
"calibration/confidence_entropy": 0.47072229513495667,
"calibration/coverage@0%": 0.03604247404982252,
"calibration/coverage@1%": 0.09677545834301624,
"calibration/coverage@10%": 0.4448384934657499,
"calibration/coverage@15%": 0.501737361819292,
"calibration/coverage@20%": 0.590501130051627,
"calibration/coverage@25%": 0.6458115752828546,
"calibration/coverage@30%": 0.8118051022628372,
"calibration/coverage@5%": 0.22979792780200406,
"calibration/ece": 0.13804144620935982,
"calibration/mean_confidence": 0.5963494289340041,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003038194444444464,
"completions/max_length": 3849.0,
"completions/max_terminated_length": 3849.0,
"completions/mean_length": 941.025,
"completions/mean_terminated_length": 944.0136962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.0,
"epoch": 1.0935988300146249,
"grad_norm": 0.0026187740731984377,
"learning_rate": 3.5156250000000003e-06,
"loss": -0.0019,
"num_tokens": 1179246659.0,
"reward": 1.0269762516021728,
"reward_std": 0.11533690541982651,
"rewards/accuracy_reward": 0.7481770753860474,
"rewards/brier_reward": 0.8202757239341736,
"rewards/confidence_uniqueness_reward": 0.9461743950843811,
"rewards/format_reward": 0.9964409708976746,
"rewards/frontier_coverage_0": -0.004797273135045544,
"rewards/frontier_coverage_1": -0.004797273135045544,
"rewards/frontier_coverage_10": 0.034060157090425494,
"rewards/frontier_coverage_15": 0.09921480715274811,
"rewards/frontier_coverage_20": 0.17450326085090637,
"rewards/frontier_coverage_25": 0.2625850081443787,
"rewards/frontier_coverage_5": -0.0045932690671179445,
"rewards/frontier_entropy_batch_reward": -0.29931144416332245,
"signal/accuracy_reward/centered_abs_mean": 0.140565325319767,
"signal/accuracy_reward/group_std_mean": 0.18419778048992158,
"signal/accuracy_reward/group_zero_std_frac": 0.4777777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0589641332626343,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0702826626598835,
"signal/advantage_abs_mean": 0.7629113554954529,
"signal/advantage_pre_scale_abs_mean": 0.08662729263305664,
"signal/advantage_pre_scale_std": 0.13903791308403016,
"signal/advantage_std": 0.9829711079597473,
"signal/brier_reward/centered_abs_mean": 0.1235265538096428,
"signal/brier_reward/group_std_mean": 0.1592309892177582,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18637319803237914,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0123526556417346,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02009183168411255,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033618181198835376,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030547019839286805,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020091832615435124,
"signal/format_reward/centered_abs_mean": 0.006787109328433872,
"signal/format_reward/group_std_mean": 0.01738979984074831,
"signal/format_reward/group_zero_std_frac": 0.9111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05055982656776905,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003393554664216936,
"signal/frontier_coverage_0/centered_abs_mean": 0.17253205180168152,
"signal/frontier_coverage_0/group_std_mean": 0.22136968672275542,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03716257512569428,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002467208169400692,
"signal/frontier_coverage_1/centered_abs_mean": 0.17253205180168152,
"signal/frontier_coverage_1/group_std_mean": 0.22136968672275542,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03716257512569428,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002467208169400692,
"signal/frontier_coverage_10/centered_abs_mean": 0.07255163341760636,
"signal/frontier_coverage_10/group_std_mean": 0.09273735284805298,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01566624455153942,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010374883189797402,
"signal/frontier_coverage_15/centered_abs_mean": 0.07906774580478668,
"signal/frontier_coverage_15/group_std_mean": 0.09843083024024964,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017194531671702862,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011306687723845244,
"signal/frontier_coverage_20/centered_abs_mean": 0.11041324287652969,
"signal/frontier_coverage_20/group_std_mean": 0.13848029375076293,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024011900275945665,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00157890934497118,
"signal/frontier_coverage_25/centered_abs_mean": 0.15008221864700316,
"signal/frontier_coverage_25/group_std_mean": 0.18917769193649292,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03261282928287983,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021461757365614175,
"signal/frontier_coverage_5/centered_abs_mean": 0.17207085490226745,
"signal/frontier_coverage_5/group_std_mean": 0.22079600393772125,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03706258684396744,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002460613241419196,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3263821184635162,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3926126003265381,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49657478332519533,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326382115483284,
"step": 455
},
{
"calibration/aurc": 0.13182774360565988,
"calibration/batch_distribution_entropy": 0.9525344528071358,
"calibration/buffer_distribution_entropy": 0.9834768704905992,
"calibration/confidence_entropy": 0.5074257563517042,
"calibration/coverage@0%": 0.03441579634464752,
"calibration/coverage@1%": 0.03441579634464752,
"calibration/coverage@10%": 0.4196420800696258,
"calibration/coverage@15%": 0.6088677654482157,
"calibration/coverage@20%": 0.8418706483899042,
"calibration/coverage@25%": 0.922715404699739,
"calibration/coverage@30%": 0.9577023498694517,
"calibration/coverage@5%": 0.2622307441253264,
"calibration/ece": 0.14650989358297242,
"calibration/mean_confidence": 0.5997194100397429,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001388888888888906,
"completions/max_length": 3680.8,
"completions/max_terminated_length": 3680.8,
"completions/mean_length": 817.7682495117188,
"completions/mean_terminated_length": 818.874755859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.4,
"epoch": 1.1055986800164999,
"grad_norm": 0.0029210939537733793,
"learning_rate": 3.4855769230769233e-06,
"loss": 0.0012,
"num_tokens": 1191737141.0,
"reward": 1.0027261614799499,
"reward_std": 0.10649179220199585,
"rewards/accuracy_reward": 0.69921875,
"rewards/brier_reward": 0.8265803694725037,
"rewards/confidence_uniqueness_reward": 0.946752381324768,
"rewards/format_reward": 0.998524296283722,
"rewards/frontier_coverage_0": 0.02881563175469637,
"rewards/frontier_coverage_1": 0.02881563175469637,
"rewards/frontier_coverage_10": 0.04026442915201187,
"rewards/frontier_coverage_15": 0.09344350546598434,
"rewards/frontier_coverage_20": 0.16026363670825958,
"rewards/frontier_coverage_25": 0.23922086954116822,
"rewards/frontier_coverage_5": 0.028822965174913406,
"rewards/frontier_entropy_batch_reward": -0.32339624464511874,
"signal/accuracy_reward/centered_abs_mean": 0.13109266459941865,
"signal/accuracy_reward/group_std_mean": 0.17193427979946135,
"signal/accuracy_reward/group_zero_std_frac": 0.5111111164093017,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0414057493209838,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06554633229970933,
"signal/advantage_abs_mean": 0.7716199159622192,
"signal/advantage_pre_scale_abs_mean": 0.08222170770168305,
"signal/advantage_pre_scale_std": 0.12968189120292664,
"signal/advantage_std": 0.9828980565071106,
"signal/brier_reward/centered_abs_mean": 0.11129124760627747,
"signal/brier_reward/group_std_mean": 0.14414749443531036,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17665610015392302,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011129124835133553,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017181032337248325,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024586594104766844,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027339120209217072,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017181032570078969,
"signal/format_reward/centered_abs_mean": 0.002739800279960036,
"signal/format_reward/group_std_mean": 0.006618343479931355,
"signal/format_reward/group_zero_std_frac": 0.9666666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.020953606348484755,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001369900139980018,
"signal/frontier_coverage_0/centered_abs_mean": 0.15755559802055358,
"signal/frontier_coverage_0/group_std_mean": 0.20323067009449006,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035665206611156464,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002253045095130801,
"signal/frontier_coverage_1/centered_abs_mean": 0.15755559802055358,
"signal/frontier_coverage_1/group_std_mean": 0.20323067009449006,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035665206611156464,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002253045095130801,
"signal/frontier_coverage_10/centered_abs_mean": 0.06791285276412964,
"signal/frontier_coverage_10/group_std_mean": 0.08749785423278808,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015421891212463379,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009711537742987276,
"signal/frontier_coverage_15/centered_abs_mean": 0.07548021227121353,
"signal/frontier_coverage_15/group_std_mean": 0.09430369436740875,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017251455783843996,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001079367089550942,
"signal/frontier_coverage_20/centered_abs_mean": 0.10724329799413682,
"signal/frontier_coverage_20/group_std_mean": 0.13464588522911072,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024510875716805457,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015335792209953069,
"signal/frontier_coverage_25/centered_abs_mean": 0.14685104191303253,
"signal/frontier_coverage_25/group_std_mean": 0.18480223715305327,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03354732654988766,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020999698666855694,
"signal/frontier_coverage_5/centered_abs_mean": 0.1571534216403961,
"signal/frontier_coverage_5/group_std_mean": 0.2027212381362915,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035574134439229965,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022472939221188427,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3332145571708679,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40059667229652407,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5312770664691925,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03332145735621452,
"step": 460
},
{
"calibration/aurc": 0.11050655459715138,
"calibration/batch_distribution_entropy": 0.9616996588856601,
"calibration/buffer_distribution_entropy": 0.9835301818257198,
"calibration/confidence_entropy": 0.4803941482968095,
"calibration/coverage@0%": 0.06412097476066145,
"calibration/coverage@1%": 0.06412097476066145,
"calibration/coverage@10%": 0.5430415034812881,
"calibration/coverage@15%": 0.781678361618799,
"calibration/coverage@20%": 0.8921303851174935,
"calibration/coverage@25%": 0.952581048738033,
"calibration/coverage@30%": 0.98125,
"calibration/coverage@5%": 0.22615181679721497,
"calibration/ece": 0.19289299100357954,
"calibration/mean_confidence": 0.5811207258908004,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0015625,
"completions/max_length": 3707.6,
"completions/max_terminated_length": 3707.6,
"completions/mean_length": 763.6137451171875,
"completions/mean_terminated_length": 764.8095703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 146.2,
"epoch": 1.1175985300183748,
"grad_norm": 0.0028260101098567247,
"learning_rate": 3.4555288461538466e-06,
"loss": 0.0146,
"num_tokens": 1203618675.0,
"reward": 1.0120792388916016,
"reward_std": 0.10953928977251053,
"rewards/accuracy_reward": 0.7052951455116272,
"rewards/brier_reward": 0.8180999517440796,
"rewards/confidence_uniqueness_reward": 0.9513657927513123,
"rewards/format_reward": 0.9983507037162781,
"rewards/frontier_coverage_0": 0.026010525721358136,
"rewards/frontier_coverage_1": 0.026010525721358136,
"rewards/frontier_coverage_10": 0.039709169417619705,
"rewards/frontier_coverage_15": 0.08716795891523361,
"rewards/frontier_coverage_20": 0.14967485666275024,
"rewards/frontier_coverage_25": 0.2271842062473297,
"rewards/frontier_coverage_5": 0.02602526988484897,
"rewards/frontier_entropy_batch_reward": -0.2500975012779236,
"signal/accuracy_reward/centered_abs_mean": 0.14068467617034913,
"signal/accuracy_reward/group_std_mean": 0.18849452435970307,
"signal/accuracy_reward/group_zero_std_frac": 0.4611111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0389271974563599,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07034233808517457,
"signal/advantage_abs_mean": 0.75585458278656,
"signal/advantage_pre_scale_abs_mean": 0.08214503675699233,
"signal/advantage_pre_scale_std": 0.12915118932723998,
"signal/advantage_std": 0.9830074667930603,
"signal/brier_reward/centered_abs_mean": 0.12185637354850769,
"signal/brier_reward/group_std_mean": 0.15803824067115785,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1799175798892975,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012185638025403022,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015415123663842678,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023467693105340005,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022776027396321296,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015415124129503966,
"signal/format_reward/centered_abs_mean": 0.003152126632630825,
"signal/format_reward/group_std_mean": 0.008380424790084362,
"signal/format_reward/group_zero_std_frac": 0.955555546283722,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.023016710579395295,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0015760633163154126,
"signal/frontier_coverage_0/centered_abs_mean": 0.18920553624629974,
"signal/frontier_coverage_0/group_std_mean": 0.24695340692996978,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0399616576731205,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027056390419602392,
"signal/frontier_coverage_1/centered_abs_mean": 0.18920553624629974,
"signal/frontier_coverage_1/group_std_mean": 0.24695340692996978,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0399616576731205,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027056390419602392,
"signal/frontier_coverage_10/centered_abs_mean": 0.07501264661550522,
"signal/frontier_coverage_10/group_std_mean": 0.09830510169267655,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01587317083030939,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010726808570325375,
"signal/frontier_coverage_15/centered_abs_mean": 0.07189750969409943,
"signal/frontier_coverage_15/group_std_mean": 0.09009484201669693,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015242060646414757,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001028134406078607,
"signal/frontier_coverage_20/centered_abs_mean": 0.09858875572681428,
"signal/frontier_coverage_20/group_std_mean": 0.12397728711366654,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020897452905774116,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014098191168159246,
"signal/frontier_coverage_25/centered_abs_mean": 0.13496056348085403,
"signal/frontier_coverage_25/group_std_mean": 0.17078811824321746,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02859700210392475,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019299360923469067,
"signal/frontier_coverage_5/centered_abs_mean": 0.1886357218027115,
"signal/frontier_coverage_5/group_std_mean": 0.24621900022029877,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03984150066971779,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026974908541888,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31103195548057555,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38082465529441833,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4604890525341034,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031103195250034334,
"step": 465
},
{
"calibration/aurc": 0.08517079158054916,
"calibration/batch_distribution_entropy": 0.9373243595334273,
"calibration/buffer_distribution_entropy": 0.9824645940877273,
"calibration/confidence_entropy": 0.46250007206138244,
"calibration/coverage@0%": 0.07691184312584835,
"calibration/coverage@1%": 0.154516009792515,
"calibration/coverage@10%": 0.6831409803595667,
"calibration/coverage@15%": 0.7912037037037039,
"calibration/coverage@20%": 0.8361937830687831,
"calibration/coverage@25%": 0.9244378306878307,
"calibration/coverage@30%": 0.9708333333333332,
"calibration/coverage@5%": 0.577076970622455,
"calibration/ece": 0.18084958862304013,
"calibration/mean_confidence": 0.5971060114193779,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002951388888888862,
"completions/max_length": 3980.8,
"completions/max_terminated_length": 3980.8,
"completions/mean_length": 772.4866333007812,
"completions/mean_terminated_length": 774.7864501953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.6,
"epoch": 1.1295983800202498,
"grad_norm": 0.0026523873675614595,
"learning_rate": 3.4254807692307695e-06,
"loss": -0.0018,
"num_tokens": 1215594809.0,
"reward": 1.0093950271606444,
"reward_std": 0.10678299516439438,
"rewards/accuracy_reward": 0.715624988079071,
"rewards/brier_reward": 0.820962381362915,
"rewards/confidence_uniqueness_reward": 0.9453279972076416,
"rewards/format_reward": 0.9969618082046509,
"rewards/frontier_coverage_0": 0.017951905727386475,
"rewards/frontier_coverage_1": 0.017951905727386475,
"rewards/frontier_coverage_10": 0.03737656474113464,
"rewards/frontier_coverage_15": 0.09893600046634674,
"rewards/frontier_coverage_20": 0.17084673941135406,
"rewards/frontier_coverage_25": 0.25529789328575136,
"rewards/frontier_coverage_5": 0.018055624887347223,
"rewards/frontier_entropy_batch_reward": -0.32342151999473573,
"signal/accuracy_reward/centered_abs_mean": 0.12125650942325591,
"signal/accuracy_reward/group_std_mean": 0.16661165952682494,
"signal/accuracy_reward/group_zero_std_frac": 0.5027777969837188,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9284161925315857,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06062825471162796,
"signal/advantage_abs_mean": 0.755516505241394,
"signal/advantage_pre_scale_abs_mean": 0.07899446189403533,
"signal/advantage_pre_scale_std": 0.12964089810848237,
"signal/advantage_std": 0.9829226613044739,
"signal/brier_reward/centered_abs_mean": 0.12145691961050034,
"signal/brier_reward/group_std_mean": 0.15792261064052582,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.189512637257576,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012145692296326161,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019829026609659194,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029715277999639512,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03138362094759941,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019829027354717256,
"signal/format_reward/centered_abs_mean": 0.005626085074618458,
"signal/format_reward/group_std_mean": 0.012826384603977203,
"signal/format_reward/group_zero_std_frac": 0.9388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.045294156298041344,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002813042537309229,
"signal/frontier_coverage_0/centered_abs_mean": 0.161966073513031,
"signal/frontier_coverage_0/group_std_mean": 0.21199508607387543,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036105792969465256,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00231611467897892,
"signal/frontier_coverage_1/centered_abs_mean": 0.161966073513031,
"signal/frontier_coverage_1/group_std_mean": 0.21199508607387543,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036105792969465256,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00231611467897892,
"signal/frontier_coverage_10/centered_abs_mean": 0.06864747554063796,
"signal/frontier_coverage_10/group_std_mean": 0.08801006525754929,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015342991799116135,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009816589066758753,
"signal/frontier_coverage_15/centered_abs_mean": 0.08157427757978439,
"signal/frontier_coverage_15/group_std_mean": 0.10169614106416702,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018237525969743727,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001166512188501656,
"signal/frontier_coverage_20/centered_abs_mean": 0.1129850059747696,
"signal/frontier_coverage_20/group_std_mean": 0.142295703291893,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025199725478887557,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016156855970621109,
"signal/frontier_coverage_25/centered_abs_mean": 0.1523255378007889,
"signal/frontier_coverage_25/group_std_mean": 0.193336421251297,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0339178204536438,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021782551892101766,
"signal/frontier_coverage_5/centered_abs_mean": 0.16131708920001983,
"signal/frontier_coverage_5/group_std_mean": 0.2111732006072998,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0359614685177803,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002306834328919649,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3393139183521271,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40554860830307005,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5308336019515991,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033931391686201094,
"step": 470
},
{
"calibration/aurc": 0.12207177828518079,
"calibration/batch_distribution_entropy": 0.9656969774722203,
"calibration/buffer_distribution_entropy": 0.9819994989830988,
"calibration/confidence_entropy": 0.4727467589110737,
"calibration/coverage@0%": 0.12662435359506646,
"calibration/coverage@1%": 0.16504585897141055,
"calibration/coverage@10%": 0.5823363220828475,
"calibration/coverage@15%": 0.6966582552676553,
"calibration/coverage@20%": 0.8079826809515552,
"calibration/coverage@25%": 0.8561049324882888,
"calibration/coverage@30%": 0.8983227702764767,
"calibration/coverage@5%": 0.33990680647612914,
"calibration/ece": 0.16863616885602545,
"calibration/mean_confidence": 0.5226219328072567,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0071180555555555355,
"completions/max_length": 4025.6,
"completions/max_terminated_length": 4025.6,
"completions/mean_length": 857.51484375,
"completions/mean_terminated_length": 863.591796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.8,
"epoch": 1.1415982300221248,
"grad_norm": 0.0022798883728682995,
"learning_rate": 3.3954326923076925e-06,
"loss": -0.0073,
"num_tokens": 1228563108.0,
"reward": 1.0127497553825378,
"reward_std": 0.11343308985233307,
"rewards/accuracy_reward": 0.7190104126930237,
"rewards/brier_reward": 0.8156786322593689,
"rewards/confidence_uniqueness_reward": 0.9434163570404053,
"rewards/format_reward": 0.9927083134651185,
"rewards/frontier_coverage_0": 0.020774408336728813,
"rewards/frontier_coverage_1": 0.020774408336728813,
"rewards/frontier_coverage_10": 0.04323679804801941,
"rewards/frontier_coverage_15": 0.09686812907457351,
"rewards/frontier_coverage_20": 0.16587933003902436,
"rewards/frontier_coverage_25": 0.24957230389118196,
"rewards/frontier_coverage_5": 0.020920474920421837,
"rewards/frontier_entropy_batch_reward": -0.2785690575838089,
"signal/accuracy_reward/centered_abs_mean": 0.13173285275697708,
"signal/accuracy_reward/group_std_mean": 0.17883577346801757,
"signal/accuracy_reward/group_zero_std_frac": 0.4694444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9889632225036621,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06586642637848854,
"signal/advantage_abs_mean": 0.7479575753211976,
"signal/advantage_pre_scale_abs_mean": 0.08370956778526306,
"signal/advantage_pre_scale_std": 0.14035601019859315,
"signal/advantage_std": 0.9829661011695862,
"signal/brier_reward/centered_abs_mean": 0.12453215271234512,
"signal/brier_reward/group_std_mean": 0.16238663494586944,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1885141134262085,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012453215941786767,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025474615022540094,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03968836776912212,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03929465599358082,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002547461586073041,
"signal/format_reward/centered_abs_mean": 0.012565104104578496,
"signal/format_reward/group_std_mean": 0.024067432433366776,
"signal/format_reward/group_zero_std_frac": 0.9000000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0986025169491768,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006282552052289248,
"signal/frontier_coverage_0/centered_abs_mean": 0.1802436351776123,
"signal/frontier_coverage_0/group_std_mean": 0.2356257289648056,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03904039040207863,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025774840265512466,
"signal/frontier_coverage_1/centered_abs_mean": 0.1802436351776123,
"signal/frontier_coverage_1/group_std_mean": 0.2356257289648056,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03904039040207863,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025774840265512466,
"signal/frontier_coverage_10/centered_abs_mean": 0.0703365221619606,
"signal/frontier_coverage_10/group_std_mean": 0.09053354859352111,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015251378715038299,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010058123152703047,
"signal/frontier_coverage_15/centered_abs_mean": 0.07596449106931687,
"signal/frontier_coverage_15/group_std_mean": 0.0946640431880951,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01646037306636572,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010862921830266715,
"signal/frontier_coverage_20/centered_abs_mean": 0.10371304005384445,
"signal/frontier_coverage_20/group_std_mean": 0.13013996928930283,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02243281565606594,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014830964850261808,
"signal/frontier_coverage_25/centered_abs_mean": 0.1406008318066597,
"signal/frontier_coverage_25/group_std_mean": 0.17746139764785768,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030378331989049913,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020105918403714894,
"signal/frontier_coverage_5/centered_abs_mean": 0.1796477258205414,
"signal/frontier_coverage_5/group_std_mean": 0.23484897315502168,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03891072869300842,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00256896261125803,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32625203132629393,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39263423085212706,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49508474469184877,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03262520357966423,
"step": 475
},
{
"calibration/aurc": 0.11996723751576499,
"calibration/batch_distribution_entropy": 0.9502936109742859,
"calibration/buffer_distribution_entropy": 0.9814678499485566,
"calibration/confidence_entropy": 0.5001467013815555,
"calibration/coverage@0%": 0.11838897543173983,
"calibration/coverage@1%": 0.12100677647886027,
"calibration/coverage@10%": 0.47935503126530066,
"calibration/coverage@15%": 0.6854775724880053,
"calibration/coverage@20%": 0.7944413968719853,
"calibration/coverage@25%": 0.8744504925528744,
"calibration/coverage@30%": 0.9459050474414429,
"calibration/coverage@5%": 0.3548078656381403,
"calibration/ece": 0.1331829238516709,
"calibration/mean_confidence": 0.6054101633020542,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005468750000000022,
"completions/max_length": 3896.8,
"completions/max_terminated_length": 3896.8,
"completions/mean_length": 770.0582641601562,
"completions/mean_terminated_length": 774.343505859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.0,
"epoch": 1.1535980800239998,
"grad_norm": 0.002632453106343746,
"learning_rate": 3.365384615384616e-06,
"loss": -0.0152,
"num_tokens": 1240526755.0,
"reward": 1.0191356778144836,
"reward_std": 0.10840296298265457,
"rewards/accuracy_reward": 0.7357638955116272,
"rewards/brier_reward": 0.8275517463684082,
"rewards/confidence_uniqueness_reward": 0.9431678771972656,
"rewards/format_reward": 0.994531261920929,
"rewards/frontier_coverage_0": 0.009372111305128782,
"rewards/frontier_coverage_1": 0.009372111305128782,
"rewards/frontier_coverage_10": 0.03894369155168533,
"rewards/frontier_coverage_15": 0.10267277508974075,
"rewards/frontier_coverage_20": 0.1778305560350418,
"rewards/frontier_coverage_25": 0.2657860189676285,
"rewards/frontier_coverage_5": 0.009487632277887315,
"rewards/frontier_entropy_batch_reward": -0.31856379210948943,
"signal/accuracy_reward/centered_abs_mean": 0.11809895783662797,
"signal/accuracy_reward/group_std_mean": 0.1618511974811554,
"signal/accuracy_reward/group_zero_std_frac": 0.5194444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9492631077766418,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05904947891831398,
"signal/advantage_abs_mean": 0.7568408727645874,
"signal/advantage_pre_scale_abs_mean": 0.07987073361873627,
"signal/advantage_pre_scale_std": 0.1343359723687172,
"signal/advantage_std": 0.9828636050224304,
"signal/brier_reward/centered_abs_mean": 0.11183222085237503,
"signal/brier_reward/group_std_mean": 0.1446450471878052,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18134056627750397,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011183222196996212,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023035935312509536,
"signal/confidence_uniqueness_reward/group_std_mean": 0.035484759509563445,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03718518950045109,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023035936523228885,
"signal/format_reward/centered_abs_mean": 0.009879557276144623,
"signal/format_reward/group_std_mean": 0.01944063398987055,
"signal/format_reward/group_zero_std_frac": 0.919444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07821155041456222,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004939778638072312,
"signal/frontier_coverage_0/centered_abs_mean": 0.14692335128784179,
"signal/frontier_coverage_0/group_std_mean": 0.1917490392923355,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033961694315075876,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002101003797724843,
"signal/frontier_coverage_1/centered_abs_mean": 0.14692335128784179,
"signal/frontier_coverage_1/group_std_mean": 0.1917490392923355,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033961694315075876,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002101003797724843,
"signal/frontier_coverage_10/centered_abs_mean": 0.05811228826642036,
"signal/frontier_coverage_10/group_std_mean": 0.07442953586578369,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013504561595618724,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000831005722284317,
"signal/frontier_coverage_15/centered_abs_mean": 0.07867266237735748,
"signal/frontier_coverage_15/group_std_mean": 0.09794508963823319,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0183884521946311,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011250190436840057,
"signal/frontier_coverage_20/centered_abs_mean": 0.11196665018796921,
"signal/frontier_coverage_20/group_std_mean": 0.14016874134540558,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02614797055721283,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016011230880394578,
"signal/frontier_coverage_25/centered_abs_mean": 0.15231850743293762,
"signal/frontier_coverage_25/group_std_mean": 0.19153027832508088,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03552020974457264,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021781546529382467,
"signal/frontier_coverage_5/centered_abs_mean": 0.14627367556095122,
"signal/frontier_coverage_5/group_std_mean": 0.19091903269290925,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033811989799141885,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002091713552363217,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3317020297050476,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39510130882263184,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5416593670845031,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03317020274698734,
"step": 480
},
{
"calibration/aurc": 0.18564510118578065,
"calibration/batch_distribution_entropy": 0.9486334244046699,
"calibration/buffer_distribution_entropy": 0.9805598930051833,
"calibration/confidence_entropy": 0.49161663712729864,
"calibration/coverage@0%": 0.006831564361775608,
"calibration/coverage@1%": 0.006831564361775608,
"calibration/coverage@10%": 0.2141053832594134,
"calibration/coverage@15%": 0.3798645940429059,
"calibration/coverage@20%": 0.6337962401133928,
"calibration/coverage@25%": 0.7658161318860899,
"calibration/coverage@30%": 0.9063885631317362,
"calibration/coverage@5%": 0.1823190643617756,
"calibration/ece": 0.1538997399439367,
"calibration/mean_confidence": 0.6057656998083434,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008246527777777768,
"completions/max_length": 3902.6,
"completions/max_terminated_length": 3902.6,
"completions/mean_length": 812.4036499023438,
"completions/mean_terminated_length": 819.348095703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.4,
"epoch": 1.1655979300258748,
"grad_norm": 0.0023592854849994183,
"learning_rate": 3.3353365384615388e-06,
"loss": -0.0169,
"num_tokens": 1253001261.0,
"reward": 1.0053442597389222,
"reward_std": 0.12032625824213028,
"rewards/accuracy_reward": 0.7107639074325561,
"rewards/brier_reward": 0.810713255405426,
"rewards/confidence_uniqueness_reward": 0.9426342844963074,
"rewards/format_reward": 0.9916666626930237,
"rewards/frontier_coverage_0": 0.010230178479105234,
"rewards/frontier_coverage_1": 0.010230178479105234,
"rewards/frontier_coverage_10": 0.0376857940107584,
"rewards/frontier_coverage_15": 0.09146715253591538,
"rewards/frontier_coverage_20": 0.15871667861938477,
"rewards/frontier_coverage_25": 0.23904962837696075,
"rewards/frontier_coverage_5": 0.010379027342423797,
"rewards/frontier_entropy_batch_reward": -0.29181754887104033,
"signal/accuracy_reward/centered_abs_mean": 0.13616536259651185,
"signal/accuracy_reward/group_std_mean": 0.18218682706356049,
"signal/accuracy_reward/group_zero_std_frac": 0.4750000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9551702618598938,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06808268129825593,
"signal/advantage_abs_mean": 0.7498261570930481,
"signal/advantage_pre_scale_abs_mean": 0.08922160565853118,
"signal/advantage_pre_scale_std": 0.14570856988430023,
"signal/advantage_std": 0.983072292804718,
"signal/brier_reward/centered_abs_mean": 0.12257575690746307,
"signal/brier_reward/group_std_mean": 0.1598212093114853,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1727291464805603,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012257575429975987,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02620217837393284,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04137233719229698,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03676874563097954,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026202178793027996,
"signal/format_reward/centered_abs_mean": 0.014735243190079928,
"signal/format_reward/group_std_mean": 0.027417659759521484,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10229784548282624,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007367621595039964,
"signal/frontier_coverage_0/centered_abs_mean": 0.16911786496639253,
"signal/frontier_coverage_0/group_std_mean": 0.2203467756509781,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03399922624230385,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024183853529393673,
"signal/frontier_coverage_1/centered_abs_mean": 0.16911786496639253,
"signal/frontier_coverage_1/group_std_mean": 0.2203467756509781,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03399922624230385,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024183853529393673,
"signal/frontier_coverage_10/centered_abs_mean": 0.06196560263633728,
"signal/frontier_coverage_10/group_std_mean": 0.08008685559034348,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012463575229048729,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000886108132544905,
"signal/frontier_coverage_15/centered_abs_mean": 0.07783405929803848,
"signal/frontier_coverage_15/group_std_mean": 0.09741163402795791,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015766285918653013,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011130270315334201,
"signal/frontier_coverage_20/centered_abs_mean": 0.11139850169420243,
"signal/frontier_coverage_20/group_std_mean": 0.14017115235328675,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022583086416125296,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015929985558614135,
"signal/frontier_coverage_25/centered_abs_mean": 0.15247215330600739,
"signal/frontier_coverage_25/group_std_mean": 0.192574143409729,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030896326154470445,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002180351689457893,
"signal/frontier_coverage_5/centered_abs_mean": 0.1685707986354828,
"signal/frontier_coverage_5/group_std_mean": 0.21966981887817383,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03388794735074043,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002410562336444855,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32269885540008547,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39061746597290037,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45797826647758483,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03226988650858402,
"step": 485
},
{
"calibration/aurc": 0.12209578498722051,
"calibration/batch_distribution_entropy": 0.958503429934321,
"calibration/buffer_distribution_entropy": 0.9797430580609839,
"calibration/confidence_entropy": 0.48254682389366027,
"calibration/coverage@0%": 0.027819554748278075,
"calibration/coverage@1%": 0.027819554748278075,
"calibration/coverage@10%": 0.5379635313909457,
"calibration/coverage@15%": 0.6975057368759578,
"calibration/coverage@20%": 0.8211241707650437,
"calibration/coverage@25%": 0.9300641786772388,
"calibration/coverage@30%": 0.984251968503937,
"calibration/coverage@5%": 0.25911881534772097,
"calibration/ece": 0.1608791440963225,
"calibration/mean_confidence": 0.5888582159462711,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555557,
"completions/max_length": 4010.4,
"completions/max_terminated_length": 4010.4,
"completions/mean_length": 828.8147705078125,
"completions/mean_terminated_length": 837.3738647460938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.0,
"epoch": 1.1775977800277497,
"grad_norm": 0.002430199645459652,
"learning_rate": 3.3052884615384617e-06,
"loss": -0.0208,
"num_tokens": 1265643895.0,
"reward": 1.008857047557831,
"reward_std": 0.12614489495754241,
"rewards/accuracy_reward": 0.7142361044883728,
"rewards/brier_reward": 0.8259082913398743,
"rewards/confidence_uniqueness_reward": 0.9401936411857605,
"rewards/format_reward": 0.989756953716278,
"rewards/frontier_coverage_0": 0.02575213145464659,
"rewards/frontier_coverage_1": 0.02575213145464659,
"rewards/frontier_coverage_10": 0.04685047268867493,
"rewards/frontier_coverage_15": 0.10534610897302628,
"rewards/frontier_coverage_20": 0.17943286299705505,
"rewards/frontier_coverage_25": 0.26633910536766053,
"rewards/frontier_coverage_5": 0.025928001292049883,
"rewards/frontier_entropy_batch_reward": -0.2940792411565781,
"signal/accuracy_reward/centered_abs_mean": 0.1511501759290695,
"signal/accuracy_reward/group_std_mean": 0.19605484008789062,
"signal/accuracy_reward/group_zero_std_frac": 0.45,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.1121771931648254,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07557508796453476,
"signal/advantage_abs_mean": 0.7582149028778076,
"signal/advantage_pre_scale_abs_mean": 0.09528509080410004,
"signal/advantage_pre_scale_std": 0.15467472672462462,
"signal/advantage_std": 0.983012342453003,
"signal/brier_reward/centered_abs_mean": 0.12473153322935104,
"signal/brier_reward/group_std_mean": 0.16199083328247071,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18308565616607667,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012473153136670589,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028536709398031233,
"signal/confidence_uniqueness_reward/group_std_mean": 0.045708222687244414,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04193682223558426,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028536709025502203,
"signal/format_reward/centered_abs_mean": 0.016503906436264516,
"signal/format_reward/group_std_mean": 0.031104812026023866,
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12030726373195648,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008251953218132258,
"signal/frontier_coverage_0/centered_abs_mean": 0.17551998496055604,
"signal/frontier_coverage_0/group_std_mean": 0.22594922184944152,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03666192330420017,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025099357357248665,
"signal/frontier_coverage_1/centered_abs_mean": 0.17551998496055604,
"signal/frontier_coverage_1/group_std_mean": 0.22594922184944152,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03666192330420017,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025099357357248665,
"signal/frontier_coverage_10/centered_abs_mean": 0.06443373411893845,
"signal/frontier_coverage_10/group_std_mean": 0.08207622617483139,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013494310528039932,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000921402417588979,
"signal/frontier_coverage_15/centered_abs_mean": 0.08036363422870636,
"signal/frontier_coverage_15/group_std_mean": 0.10059184283018112,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017017839662730693,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011491999262943863,
"signal/frontier_coverage_20/centered_abs_mean": 0.1145074725151062,
"signal/frontier_coverage_20/group_std_mean": 0.14463033080101012,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02431431822478771,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001637456798925996,
"signal/frontier_coverage_25/centered_abs_mean": 0.15715709924697877,
"signal/frontier_coverage_25/group_std_mean": 0.1994520455598831,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03337772414088249,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002247346518561244,
"signal/frontier_coverage_5/centered_abs_mean": 0.1747252196073532,
"signal/frontier_coverage_5/group_std_mean": 0.22497124075889588,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036491810157895085,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002498570643365383,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3236543297767639,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39115352034568784,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4786272764205933,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03236543461680412,
"step": 490
},
{
"calibration/aurc": 0.15002666997967945,
"calibration/batch_distribution_entropy": 0.9103722554100419,
"calibration/buffer_distribution_entropy": 0.979845427603407,
"calibration/confidence_entropy": 0.4612177545633805,
"calibration/coverage@0%": 0.05277696335234473,
"calibration/coverage@1%": 0.05277696335234473,
"calibration/coverage@10%": 0.45088894750812303,
"calibration/coverage@15%": 0.5895088163308796,
"calibration/coverage@20%": 0.7311479303775578,
"calibration/coverage@25%": 0.8568830252575138,
"calibration/coverage@30%": 0.8994821504162551,
"calibration/coverage@5%": 0.30198401792809815,
"calibration/ece": 0.12433344289083512,
"calibration/mean_confidence": 0.6259431202668907,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014496527777777768,
"completions/max_length": 4034.0,
"completions/max_terminated_length": 4034.0,
"completions/mean_length": 829.9917602539062,
"completions/mean_terminated_length": 842.1129760742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 151.2,
"epoch": 1.1895976300296247,
"grad_norm": 0.003350967774167657,
"learning_rate": 3.2752403846153846e-06,
"loss": -0.0402,
"num_tokens": 1278284440.0,
"reward": 0.997491466999054,
"reward_std": 0.1228562831878662,
"rewards/accuracy_reward": 0.705555546283722,
"rewards/brier_reward": 0.829783308506012,
"rewards/confidence_uniqueness_reward": 0.932075309753418,
"rewards/format_reward": 0.9855034708976745,
"rewards/frontier_coverage_0": 0.035646550729870795,
"rewards/frontier_coverage_1": 0.035646550729870795,
"rewards/frontier_coverage_10": 0.050268112868070605,
"rewards/frontier_coverage_15": 0.11117468625307084,
"rewards/frontier_coverage_20": 0.18869028091430665,
"rewards/frontier_coverage_25": 0.2782452583312988,
"rewards/frontier_coverage_5": 0.03579212427139282,
"rewards/frontier_entropy_batch_reward": -0.34741051197052003,
"signal/accuracy_reward/centered_abs_mean": 0.12520616501569748,
"signal/accuracy_reward/group_std_mean": 0.17152535617351533,
"signal/accuracy_reward/group_zero_std_frac": 0.49166667461395264,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9340617418289184,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06260308250784874,
"signal/advantage_abs_mean": 0.7470793724060059,
"signal/advantage_pre_scale_abs_mean": 0.08969731330871582,
"signal/advantage_pre_scale_std": 0.15365103781223297,
"signal/advantage_std": 0.9829949975013733,
"signal/brier_reward/centered_abs_mean": 0.11885639429092407,
"signal/brier_reward/group_std_mean": 0.15579778254032134,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17715712189674376,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01188563983887434,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036046646907925604,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05560135096311569,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05372623428702354,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036046647001057863,
"signal/format_reward/centered_abs_mean": 0.02235785610973835,
"signal/format_reward/group_std_mean": 0.03920154646039009,
"signal/format_reward/group_zero_std_frac": 0.8472222089767456,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.16617890894412995,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011178928054869175,
"signal/frontier_coverage_0/centered_abs_mean": 0.14919237792491913,
"signal/frontier_coverage_0/group_std_mean": 0.19633124768733978,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031800294667482375,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00213345093652606,
"signal/frontier_coverage_1/centered_abs_mean": 0.14919237792491913,
"signal/frontier_coverage_1/group_std_mean": 0.19633124768733978,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031800294667482375,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00213345093652606,
"signal/frontier_coverage_10/centered_abs_mean": 0.0596095934510231,
"signal/frontier_coverage_10/group_std_mean": 0.07624187171459199,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012707811035215854,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008524171658791602,
"signal/frontier_coverage_15/centered_abs_mean": 0.08215909749269486,
"signal/frontier_coverage_15/group_std_mean": 0.10303394496440887,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01755863316357136,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011748750694096088,
"signal/frontier_coverage_20/centered_abs_mean": 0.11732317209243774,
"signal/frontier_coverage_20/group_std_mean": 0.1485839903354645,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02507934905588627,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016777212964370846,
"signal/frontier_coverage_25/centered_abs_mean": 0.1591496855020523,
"signal/frontier_coverage_25/group_std_mean": 0.20272190868854523,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03401615396142006,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002275840612128377,
"signal/frontier_coverage_5/centered_abs_mean": 0.14844318926334382,
"signal/frontier_coverage_5/group_std_mean": 0.1953786164522171,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031640862300992015,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002122737606987357,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34229235649108886,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4075429916381836,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5113430559635163,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03422923907637596,
"step": 495
},
{
"calibration/aurc": 0.1747839233157916,
"calibration/batch_distribution_entropy": 0.9480725878663744,
"calibration/buffer_distribution_entropy": 0.9787909719533993,
"calibration/confidence_entropy": 0.48127999860544374,
"calibration/coverage@0%": 0.14181121071084904,
"calibration/coverage@1%": 0.1857661207341445,
"calibration/coverage@10%": 0.40346547235726893,
"calibration/coverage@15%": 0.45549036350187266,
"calibration/coverage@20%": 0.6920285357135578,
"calibration/coverage@25%": 0.7202605792747888,
"calibration/coverage@30%": 0.7576995831311144,
"calibration/coverage@5%": 0.3586746769897794,
"calibration/ece": 0.18344351358857308,
"calibration/mean_confidence": 0.580451451919918,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025434027777777767,
"completions/max_length": 4065.8,
"completions/max_terminated_length": 4065.8,
"completions/mean_length": 929.4675415039062,
"completions/mean_terminated_length": 953.9163452148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.4,
"epoch": 1.2015974800314997,
"grad_norm": 0.00263255275785923,
"learning_rate": 3.245192307692308e-06,
"loss": -0.0572,
"num_tokens": 1292045410.0,
"reward": 0.9915547251701355,
"reward_std": 0.12933797985315323,
"rewards/accuracy_reward": 0.7018229246139527,
"rewards/brier_reward": 0.8028098583221436,
"rewards/confidence_uniqueness_reward": 0.9266672849655151,
"rewards/format_reward": 0.9745659708976746,
"rewards/frontier_coverage_0": 0.02632404714822769,
"rewards/frontier_coverage_1": 0.02632404714822769,
"rewards/frontier_coverage_10": 0.04794232621788978,
"rewards/frontier_coverage_15": 0.09831590056419373,
"rewards/frontier_coverage_20": 0.1676923632621765,
"rewards/frontier_coverage_25": 0.2507633984088898,
"rewards/frontier_coverage_5": 0.026532990764826535,
"rewards/frontier_entropy_batch_reward": -0.28795150518417356,
"signal/accuracy_reward/centered_abs_mean": 0.12767469733953477,
"signal/accuracy_reward/group_std_mean": 0.16988992393016816,
"signal/accuracy_reward/group_zero_std_frac": 0.5111111223697662,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.93791184425354,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06383734866976738,
"signal/advantage_abs_mean": 0.7479843735694885,
"signal/advantage_pre_scale_abs_mean": 0.09546350091695785,
"signal/advantage_pre_scale_std": 0.16411724388599397,
"signal/advantage_std": 0.9830235481262207,
"signal/brier_reward/centered_abs_mean": 0.13326035737991332,
"signal/brier_reward/group_std_mean": 0.1708984136581421,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19498080313205718,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013326035998761653,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04590501487255096,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0698548398911953,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06717531010508537,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045905016828328375,
"signal/format_reward/centered_abs_mean": 0.03612738735973835,
"signal/format_reward/group_std_mean": 0.058421958982944486,
"signal/format_reward/group_zero_std_frac": 0.7861111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.26427164673805237,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.018063693679869174,
"signal/frontier_coverage_0/centered_abs_mean": 0.18238037824630737,
"signal/frontier_coverage_0/group_std_mean": 0.23497777283191681,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03820807188749313,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002608039416372776,
"signal/frontier_coverage_1/centered_abs_mean": 0.18238037824630737,
"signal/frontier_coverage_1/group_std_mean": 0.23497777283191681,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03820807188749313,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002608039416372776,
"signal/frontier_coverage_10/centered_abs_mean": 0.06780672222375869,
"signal/frontier_coverage_10/group_std_mean": 0.08613030165433884,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01419361848384142,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009696360910311341,
"signal/frontier_coverage_15/centered_abs_mean": 0.07367192506790161,
"signal/frontier_coverage_15/group_std_mean": 0.09180823713541031,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015434963069856167,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010535084875300527,
"signal/frontier_coverage_20/centered_abs_mean": 0.1000160589814186,
"signal/frontier_coverage_20/group_std_mean": 0.12563731968402864,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020969900116324425,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001430229633115232,
"signal/frontier_coverage_25/centered_abs_mean": 0.1348770409822464,
"signal/frontier_coverage_25/group_std_mean": 0.17066603899002075,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028289969265460967,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019287416245788336,
"signal/frontier_coverage_5/centered_abs_mean": 0.18139650523662568,
"signal/frontier_coverage_5/group_std_mean": 0.23375505208969116,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03800202459096909,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00259396992623806,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3278991162776947,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3979050636291504,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.479867422580719,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032789912074804306,
"step": 500
},
{
"epoch": 1.2015974800314997,
"eval_calibration/aurc": 0.11063293442041942,
"eval_calibration/batch_distribution_entropy": 0.9226941120344988,
"eval_calibration/buffer_distribution_entropy": 0.9794678494682892,
"eval_calibration/confidence_entropy": 0.48473487947917687,
"eval_calibration/coverage@0%": 0.2981406810035842,
"eval_calibration/coverage@1%": 0.2981406810035842,
"eval_calibration/coverage@10%": 0.6006496415770609,
"eval_calibration/coverage@15%": 0.783826164874552,
"eval_calibration/coverage@20%": 0.8810035842293907,
"eval_calibration/coverage@25%": 0.9301075268817205,
"eval_calibration/coverage@30%": 0.989247311827957,
"eval_calibration/coverage@5%": 0.4925739247311827,
"eval_calibration/ece": 0.24244848342293904,
"eval_calibration/mean_confidence": 0.5366845362903225,
"eval_completions/clipped_ratio": 0.02777777777777779,
"eval_completions/max_length": 3848.8333333333335,
"eval_completions/max_terminated_length": 3848.8333333333335,
"eval_completions/mean_length": 937.8027954101562,
"eval_completions/mean_terminated_length": 964.6242472330729,
"eval_completions/min_length": 44.0,
"eval_completions/min_terminated_length": 174.5,
"eval_loss": 0.0,
"eval_num_tokens": 1292045410.0,
"eval_reward": 0.8992621103922526,
"eval_reward_std": 0.27247366060813266,
"eval_rewards/accuracy_reward": 0.6744791567325592,
"eval_rewards/brier_reward": 0.7963380813598633,
"eval_rewards/confidence_uniqueness_reward": 0.8638127446174622,
"eval_rewards/format_reward": 0.9678819477558136,
"eval_rewards/frontier_coverage_0": 0.03403743077069521,
"eval_rewards/frontier_coverage_1": 0.03403743077069521,
"eval_rewards/frontier_coverage_10": 0.04340067381660143,
"eval_rewards/frontier_coverage_15": 0.08919741213321686,
"eval_rewards/frontier_coverage_20": 0.15402339398860931,
"eval_rewards/frontier_coverage_25": 0.23039834946393967,
"eval_rewards/frontier_coverage_5": 0.0341131171832482,
"eval_rewards/frontier_entropy_batch_reward": -0.9678819477558136,
"eval_runtime": 226.0063,
"eval_samples_per_second": 4.425,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4259440153837204,
"eval_signal/accuracy_reward/group_std_mean": 0.46788185834884644,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7978424926598867,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2129720076918602,
"eval_signal/advantage_abs_mean": 0.8432898223400116,
"eval_signal/advantage_pre_scale_abs_mean": 0.2298309033115705,
"eval_signal/advantage_pre_scale_std": 0.2716887692610423,
"eval_signal/advantage_std": 0.9864533146222433,
"eval_signal/brier_reward/centered_abs_mean": 0.1994489332040151,
"eval_signal/brier_reward/group_std_mean": 0.26833559075991315,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07479969660441081,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019944893817106884,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07945458094278972,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.14848080774148306,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029809714915851753,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007945458016668757,
"eval_signal/format_reward/centered_abs_mean": 0.060601128886143364,
"eval_signal/format_reward/group_std_mean": 0.1444742592672507,
"eval_signal/format_reward/group_zero_std_frac": 0.3055555621782939,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.1130801538626353,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.030300564443071682,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2900494833787282,
"eval_signal/frontier_coverage_0/group_std_mean": 0.402191494901975,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01558228504533569,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004147707639882962,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2900494833787282,
"eval_signal/frontier_coverage_1/group_std_mean": 0.402191494901975,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01558228504533569,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004147707639882962,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.09187478696306546,
"eval_signal/frontier_coverage_10/group_std_mean": 0.12893202776710191,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004941360326483846,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001313809499454995,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.10598516836762428,
"eval_signal/frontier_coverage_15/group_std_mean": 0.13580323879917464,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005688676067317526,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015155878500081599,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1837949976325035,
"eval_signal/frontier_coverage_20/group_std_mean": 0.22733782976865768,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009865260062118372,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026282683635751405,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.27196736137072247,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3316345016161601,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014592031327386698,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003889133183596035,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2886228909095128,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4004148344198863,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015506657616545757,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004127307174106439,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.060601128886143364,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.1444742592672507,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3055555621782939,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022616030648350716,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.006060113121444981,
"eval_steps_per_second": 0.027,
"step": 500
},
{
"epoch": 1.2015974800314997,
"step": 500,
"train_probe_calibration/aurc": 0.10082706938955065,
"train_probe_calibration/batch_distribution_entropy": 0.9239375688015242,
"train_probe_calibration/buffer_distribution_entropy": 0.9797398341398745,
"train_probe_calibration/confidence_entropy": 0.5040916248291127,
"train_probe_calibration/coverage@0%": 0.3246373285131628,
"train_probe_calibration/coverage@1%": 0.3246373285131628,
"train_probe_calibration/coverage@10%": 0.5615556637004079,
"train_probe_calibration/coverage@15%": 0.7940188172043011,
"train_probe_calibration/coverage@20%": 0.9206989247311829,
"train_probe_calibration/coverage@25%": 0.9627016129032259,
"train_probe_calibration/coverage@30%": 0.9946236559139785,
"train_probe_calibration/coverage@5%": 0.43221635150166854,
"train_probe_calibration/ece": 0.23516302836484984,
"train_probe_calibration/mean_confidence": 0.5604287286800148,
"train_probe_completions/clipped_ratio": 0.025868055555555564,
"train_probe_completions/max_length": 3734.5,
"train_probe_completions/max_terminated_length": 3734.5,
"train_probe_completions/mean_length": 925.494639078776,
"train_probe_completions/mean_terminated_length": 950.2415771484375,
"train_probe_completions/min_length": 0.0,
"train_probe_completions/min_terminated_length": 142.5,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1292045410.0,
"train_probe_reward": 0.9318026403586069,
"train_probe_reward_std": 0.2528877506653468,
"train_probe_rewards/accuracy_reward": 0.7326388855775198,
"train_probe_rewards/brier_reward": 0.8016867140928904,
"train_probe_rewards/confidence_uniqueness_reward": 0.875600536664327,
"train_probe_rewards/format_reward": 0.9739583333333334,
"train_probe_rewards/frontier_coverage_0": -0.0028847836268444857,
"train_probe_rewards/frontier_coverage_1": -0.0028847836268444857,
"train_probe_rewards/frontier_coverage_10": 0.038503200436631836,
"train_probe_rewards/frontier_coverage_15": 0.10058233390251796,
"train_probe_rewards/frontier_coverage_20": 0.1763912762204806,
"train_probe_rewards/frontier_coverage_25": 0.2643541420499484,
"train_probe_rewards/frontier_coverage_5": -0.0026549692265689373,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9739583333333334,
"train_probe_runtime": 213.9075,
"train_probe_samples_per_second": 4.675,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3841145783662796,
"train_probe_signal/accuracy_reward/group_std_mean": 0.44358054796854657,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.785220742225647,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1920572891831398,
"train_probe_signal/advantage_abs_mean": 0.806507021188736,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20440822591384253,
"train_probe_signal/advantage_pre_scale_std": 0.25263623893260956,
"train_probe_signal/advantage_std": 0.9864190816879272,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1916190137465795,
"train_probe_signal/brier_reward/group_std_mean": 0.25596588601668674,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.078202273696661,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.019161902368068695,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07009036901096503,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.1314548502365748,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028176602286597092,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007009036994228761,
"train_probe_signal/format_reward/centered_abs_mean": 0.04937065920482079,
"train_probe_signal/format_reward/group_std_mean": 0.12234597342709701,
"train_probe_signal/format_reward/group_zero_std_frac": 0.3888888979951541,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.09680349566042423,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.024685329602410395,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2796660164992015,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.40178043643633526,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01646174117922783,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003999224087844293,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2796660164992015,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.40178043643633526,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01646174117922783,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003999224087844293,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.09035198017954826,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.12981040154894194,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005333998861412208,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012920333344178896,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1053730125228564,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.13237932324409485,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006203630783905585,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015068341551038127,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.18111580361922583,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.21775591125090918,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.010658677046497663,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025899558483312526,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2640492667754491,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.31466857592264813,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015524488873779774,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003775904420763254,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.27851397295792896,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.40030378103256226,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016393487496922415,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003982749573575954,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.04937065920482079,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.12234597342709701,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3888888979951541,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019360700622200966,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.004937066075702508,
"train_probe_steps_per_second": 0.028
},
{
"calibration/aurc": 0.20048948593563543,
"calibration/batch_distribution_entropy": 0.9468109675307013,
"calibration/buffer_distribution_entropy": 0.9805443145825734,
"calibration/confidence_entropy": 0.48701692744443614,
"calibration/coverage@0%": 0.02852633425186054,
"calibration/coverage@1%": 0.09942580515133145,
"calibration/coverage@10%": 0.2110494824000087,
"calibration/coverage@15%": 0.3519585394662962,
"calibration/coverage@20%": 0.48718463966566417,
"calibration/coverage@25%": 0.7482349120999991,
"calibration/coverage@30%": 0.8652923686374748,
"calibration/coverage@5%": 0.2004840062095325,
"calibration/ece": 0.15429015886639375,
"calibration/mean_confidence": 0.5836335671726103,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.022395833333333327,
"completions/max_length": 4073.4,
"completions/max_terminated_length": 4073.4,
"completions/mean_length": 924.2508056640625,
"completions/mean_terminated_length": 946.0132934570313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.4,
"epoch": 1.2135973300333747,
"grad_norm": 0.0023646834306418896,
"learning_rate": 3.215144230769231e-06,
"loss": -0.0669,
"num_tokens": 1305812971.0,
"reward": 0.9820307612419128,
"reward_std": 0.13412527740001678,
"rewards/accuracy_reward": 0.6799479246139526,
"rewards/brier_reward": 0.8102263927459716,
"rewards/confidence_uniqueness_reward": 0.9278807520866394,
"rewards/format_reward": 0.97734375,
"rewards/frontier_coverage_0": 0.03650612365454435,
"rewards/frontier_coverage_1": 0.03650612365454435,
"rewards/frontier_coverage_10": 0.04657657854259014,
"rewards/frontier_coverage_15": 0.09520111978054047,
"rewards/frontier_coverage_20": 0.16185519099235535,
"rewards/frontier_coverage_25": 0.23959860801696778,
"rewards/frontier_coverage_5": 0.03667759094387293,
"rewards/frontier_entropy_batch_reward": -0.2976256161928177,
"signal/accuracy_reward/centered_abs_mean": 0.13191731721162797,
"signal/accuracy_reward/group_std_mean": 0.17187346816062926,
"signal/accuracy_reward/group_zero_std_frac": 0.5166666805744171,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9841681718826294,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06595865860581399,
"signal/advantage_abs_mean": 0.7553405404090882,
"signal/advantage_pre_scale_abs_mean": 0.09927740842103958,
"signal/advantage_pre_scale_std": 0.17140043079853057,
"signal/advantage_std": 0.9830001354217529,
"signal/brier_reward/centered_abs_mean": 0.12971344888210296,
"signal/brier_reward/group_std_mean": 0.16749563217163085,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19204829931259154,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012971345335245132,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.046678535640239716,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0732833631336689,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06865589916706086,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004667853750288486,
"signal/format_reward/centered_abs_mean": 0.03662651926279068,
"signal/format_reward/group_std_mean": 0.061449573189020154,
"signal/format_reward/group_zero_std_frac": 0.7722222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.26796387135982513,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01831325963139534,
"signal/frontier_coverage_0/centered_abs_mean": 0.16079207360744477,
"signal/frontier_coverage_0/group_std_mean": 0.2081853598356247,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034043775871396065,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022993266582489015,
"signal/frontier_coverage_1/centered_abs_mean": 0.16079207360744477,
"signal/frontier_coverage_1/group_std_mean": 0.2081853598356247,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034043775871396065,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022993266582489015,
"signal/frontier_coverage_10/centered_abs_mean": 0.062105555832386014,
"signal/frontier_coverage_10/group_std_mean": 0.07952445596456528,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013151372782886028,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008881094516254961,
"signal/frontier_coverage_15/centered_abs_mean": 0.07533517330884934,
"signal/frontier_coverage_15/group_std_mean": 0.09395917057991028,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016060548834502697,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001077292929403484,
"signal/frontier_coverage_20/centered_abs_mean": 0.10723637640476227,
"signal/frontier_coverage_20/group_std_mean": 0.1344798132777214,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022898206114768983,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001533480198122561,
"signal/frontier_coverage_25/centered_abs_mean": 0.14641993939876558,
"signal/frontier_coverage_25/group_std_mean": 0.1840170592069626,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03127242475748062,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020938052562996744,
"signal/frontier_coverage_5/centered_abs_mean": 0.16002678871154785,
"signal/frontier_coverage_5/group_std_mean": 0.20722314417362214,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03388084582984448,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022883829893544315,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3222051739692688,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3910215377807617,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4791221499443054,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03222051709890365,
"step": 505
},
{
"calibration/aurc": 0.11524218938071602,
"calibration/batch_distribution_entropy": 0.9543161540977898,
"calibration/buffer_distribution_entropy": 0.9807149695576822,
"calibration/confidence_entropy": 0.48301446353835925,
"calibration/coverage@0%": 0.07332392905046345,
"calibration/coverage@1%": 0.15872933445586884,
"calibration/coverage@10%": 0.5471898087890095,
"calibration/coverage@15%": 0.6684821027885148,
"calibration/coverage@20%": 0.8312767838615454,
"calibration/coverage@25%": 0.9212252117614753,
"calibration/coverage@30%": 0.9618798955613578,
"calibration/coverage@5%": 0.32275472534539945,
"calibration/ece": 0.14129609700485238,
"calibration/mean_confidence": 0.6127158754894365,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.027083333333333348,
"completions/max_length": 4070.2,
"completions/max_terminated_length": 4070.2,
"completions/mean_length": 886.969970703125,
"completions/mean_terminated_length": 911.6876220703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 1.2255971800352496,
"grad_norm": 0.0024071610532701015,
"learning_rate": 3.185096153846154e-06,
"loss": -0.0754,
"num_tokens": 1319165553.0,
"reward": 0.9764072179794312,
"reward_std": 0.14252603948116302,
"rewards/accuracy_reward": 0.6737847089767456,
"rewards/brier_reward": 0.811568808555603,
"rewards/confidence_uniqueness_reward": 0.9235637068748475,
"rewards/format_reward": 0.9729166626930237,
"rewards/frontier_coverage_0": 0.03956596069037914,
"rewards/frontier_coverage_1": 0.03956596069037914,
"rewards/frontier_coverage_10": 0.045381075143814086,
"rewards/frontier_coverage_15": 0.09828296452760696,
"rewards/frontier_coverage_20": 0.16703141033649443,
"rewards/frontier_coverage_25": 0.24602045118808746,
"rewards/frontier_coverage_5": 0.03960155472159386,
"rewards/frontier_entropy_batch_reward": -0.301156747341156,
"signal/accuracy_reward/centered_abs_mean": 0.1375868022441864,
"signal/accuracy_reward/group_std_mean": 0.18322362005710602,
"signal/accuracy_reward/group_zero_std_frac": 0.4750000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9304415464401246,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0687934011220932,
"signal/advantage_abs_mean": 0.7482947945594788,
"signal/advantage_pre_scale_abs_mean": 0.10517836511135101,
"signal/advantage_pre_scale_std": 0.17666726410388947,
"signal/advantage_std": 0.9831276297569275,
"signal/brier_reward/centered_abs_mean": 0.13354314863681793,
"signal/brier_reward/group_std_mean": 0.17319420278072356,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18088602125644684,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013354315236210824,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04955080598592758,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07686270922422409,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06728862300515175,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004955080850049853,
"signal/format_reward/centered_abs_mean": 0.03995225727558136,
"signal/format_reward/group_std_mean": 0.06567521169781684,
"signal/format_reward/group_zero_std_frac": 0.7583333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2711753636598587,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01997612863779068,
"signal/frontier_coverage_0/centered_abs_mean": 0.15956219732761384,
"signal/frontier_coverage_0/group_std_mean": 0.2080337166786194,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03082931824028492,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002281739329919219,
"signal/frontier_coverage_1/centered_abs_mean": 0.15956219732761384,
"signal/frontier_coverage_1/group_std_mean": 0.2080337166786194,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03082931824028492,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002281739329919219,
"signal/frontier_coverage_10/centered_abs_mean": 0.062475910782814024,
"signal/frontier_coverage_10/group_std_mean": 0.08098638206720352,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012097678333520889,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008934055338613689,
"signal/frontier_coverage_15/centered_abs_mean": 0.0785724624991417,
"signal/frontier_coverage_15/group_std_mean": 0.09894902110099793,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015267135202884674,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011235862039029598,
"signal/frontier_coverage_20/centered_abs_mean": 0.11322257518768311,
"signal/frontier_coverage_20/group_std_mean": 0.14310256838798524,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021999914199113846,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001619082852266729,
"signal/frontier_coverage_25/centered_abs_mean": 0.15498829185962676,
"signal/frontier_coverage_25/group_std_mean": 0.19647544622421265,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030105485394597054,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022163325920701027,
"signal/frontier_coverage_5/centered_abs_mean": 0.15889337360858918,
"signal/frontier_coverage_5/group_std_mean": 0.20719724297523498,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030700084939599036,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022721752058714626,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3251783013343811,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39037303924560546,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44218236207962036,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03251783214509487,
"step": 510
},
{
"calibration/aurc": 0.14329086717155515,
"calibration/batch_distribution_entropy": 0.9839842077691205,
"calibration/buffer_distribution_entropy": 0.9813149779353205,
"calibration/confidence_entropy": 0.4870171331461034,
"calibration/coverage@0%": 0.14524750649642462,
"calibration/coverage@1%": 0.20226281740410096,
"calibration/coverage@10%": 0.5290204554078171,
"calibration/coverage@15%": 0.6161437388665884,
"calibration/coverage@20%": 0.6710200053905481,
"calibration/coverage@25%": 0.732897098273093,
"calibration/coverage@30%": 0.9145777773032158,
"calibration/coverage@5%": 0.3410370616474247,
"calibration/ece": 0.18543605582400366,
"calibration/mean_confidence": 0.5287664657151198,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.038107638888888885,
"completions/max_length": 4064.6,
"completions/max_terminated_length": 4064.6,
"completions/mean_length": 964.12431640625,
"completions/mean_terminated_length": 1002.4943969726562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 1.2375970300371246,
"grad_norm": 0.0022482494823634624,
"learning_rate": 3.1550480769230772e-06,
"loss": -0.1034,
"num_tokens": 1333396905.0,
"reward": 0.9720722794532776,
"reward_std": 0.15557878315448762,
"rewards/accuracy_reward": 0.6752604126930237,
"rewards/brier_reward": 0.7917217254638672,
"rewards/confidence_uniqueness_reward": 0.9149444341659546,
"rewards/format_reward": 0.9618923664093018,
"rewards/frontier_coverage_0": 0.03308947309851647,
"rewards/frontier_coverage_1": 0.03308947309851647,
"rewards/frontier_coverage_10": 0.047409339994192126,
"rewards/frontier_coverage_15": 0.09314282685518264,
"rewards/frontier_coverage_20": 0.15709015727043152,
"rewards/frontier_coverage_25": 0.2324953556060791,
"rewards/frontier_coverage_5": 0.03309837207198143,
"rewards/frontier_entropy_batch_reward": -0.2617133766412735,
"signal/accuracy_reward/centered_abs_mean": 0.15260959267616273,
"signal/accuracy_reward/group_std_mean": 0.19807665944099426,
"signal/accuracy_reward/group_zero_std_frac": 0.44722222685813906,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9974145650863647,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07630479633808136,
"signal/advantage_abs_mean": 0.7443260788917542,
"signal/advantage_pre_scale_abs_mean": 0.11457104533910752,
"signal/advantage_pre_scale_std": 0.1953383594751358,
"signal/advantage_std": 0.9831731081008911,
"signal/brier_reward/centered_abs_mean": 0.1467885345220566,
"signal/brier_reward/group_std_mean": 0.18838548064231872,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19225478768348694,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014678853936493397,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06417426541447639,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09944256693124771,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08404597043991088,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006417426653206349,
"signal/format_reward/centered_abs_mean": 0.05706922709941864,
"signal/format_reward/group_std_mean": 0.09146839380264282,
"signal/format_reward/group_zero_std_frac": 0.675000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.37363603711128235,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02853461354970932,
"signal/frontier_coverage_0/centered_abs_mean": 0.1946762889623642,
"signal/frontier_coverage_0/group_std_mean": 0.25131337344646454,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036392098665237425,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002783870976418257,
"signal/frontier_coverage_1/centered_abs_mean": 0.1946762889623642,
"signal/frontier_coverage_1/group_std_mean": 0.25131337344646454,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036392098665237425,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002783870976418257,
"signal/frontier_coverage_10/centered_abs_mean": 0.07278724461793899,
"signal/frontier_coverage_10/group_std_mean": 0.09403313398361206,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013623019121587277,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010408576345071197,
"signal/frontier_coverage_15/centered_abs_mean": 0.07076575458049775,
"signal/frontier_coverage_15/group_std_mean": 0.08887670189142227,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013323003239929677,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010119502898305655,
"signal/frontier_coverage_20/centered_abs_mean": 0.09599952101707458,
"signal/frontier_coverage_20/group_std_mean": 0.12031063288450242,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01808130946010351,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001372793153859675,
"signal/frontier_coverage_25/centered_abs_mean": 0.13066532760858535,
"signal/frontier_coverage_25/group_std_mean": 0.1640017569065094,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0245991725474596,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018685141811147332,
"signal/frontier_coverage_5/centered_abs_mean": 0.19397080540657044,
"signal/frontier_coverage_5/group_std_mean": 0.25044049620628356,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0362599141895771,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027737823780626058,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31684728264808654,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3862759530544281,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4152994632720947,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03168472871184349,
"step": 515
},
{
"calibration/aurc": 0.10522250668379389,
"calibration/batch_distribution_entropy": 0.9442073604560515,
"calibration/buffer_distribution_entropy": 0.9815735705294294,
"calibration/confidence_entropy": 0.46795378636960266,
"calibration/coverage@0%": 0.21857111712669325,
"calibration/coverage@1%": 0.22694808047747855,
"calibration/coverage@10%": 0.621711658227854,
"calibration/coverage@15%": 0.7427390973094512,
"calibration/coverage@20%": 0.8060069914470027,
"calibration/coverage@25%": 0.8796992448478779,
"calibration/coverage@30%": 0.943609360040151,
"calibration/coverage@5%": 0.3731836924541433,
"calibration/ece": 0.155591407185311,
"calibration/mean_confidence": 0.5833382217229488,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02178819444444442,
"completions/max_length": 4046.4,
"completions/max_terminated_length": 4046.4,
"completions/mean_length": 969.8170288085937,
"completions/mean_terminated_length": 991.4690673828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 134.8,
"epoch": 1.2495968800389996,
"grad_norm": 0.002296426799148321,
"learning_rate": 3.125e-06,
"loss": -0.06,
"num_tokens": 1347645709.0,
"reward": 0.999582850933075,
"reward_std": 0.13867679089307786,
"rewards/accuracy_reward": 0.7163194417953491,
"rewards/brier_reward": 0.8157782912254333,
"rewards/confidence_uniqueness_reward": 0.9268351197242737,
"rewards/format_reward": 0.9780381917953491,
"rewards/frontier_coverage_0": 0.01865054778754711,
"rewards/frontier_coverage_1": 0.01865054778754711,
"rewards/frontier_coverage_10": 0.04594656229019165,
"rewards/frontier_coverage_15": 0.10937698483467102,
"rewards/frontier_coverage_20": 0.18607729077339172,
"rewards/frontier_coverage_25": 0.2728832870721817,
"rewards/frontier_coverage_5": 0.018807118758559227,
"rewards/frontier_entropy_batch_reward": -0.31443960666656495,
"signal/accuracy_reward/centered_abs_mean": 0.13279080092906953,
"signal/accuracy_reward/group_std_mean": 0.17991018891334534,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9323319673538208,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06639540046453477,
"signal/advantage_abs_mean": 0.7279812693595886,
"signal/advantage_pre_scale_abs_mean": 0.09768068045377731,
"signal/advantage_pre_scale_std": 0.17483056783676149,
"signal/advantage_std": 0.983068585395813,
"signal/brier_reward/centered_abs_mean": 0.13028870820999144,
"signal/brier_reward/group_std_mean": 0.17139129638671874,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18324156403541564,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013028871826827526,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.048380535840988156,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08181221485137939,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06806915402412414,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0048380537889897825,
"signal/format_reward/centered_abs_mean": 0.03767903596162796,
"signal/format_reward/group_std_mean": 0.06963710114359856,
"signal/format_reward/group_zero_std_frac": 0.7194444417953492,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2640444874763489,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01883951798081398,
"signal/frontier_coverage_0/centered_abs_mean": 0.16820046305656433,
"signal/frontier_coverage_0/group_std_mean": 0.21814047396183014,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03390519693493843,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002405266650021076,
"signal/frontier_coverage_1/centered_abs_mean": 0.16820046305656433,
"signal/frontier_coverage_1/group_std_mean": 0.21814047396183014,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03390519693493843,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002405266650021076,
"signal/frontier_coverage_10/centered_abs_mean": 0.06460350453853607,
"signal/frontier_coverage_10/group_std_mean": 0.08274413645267487,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013064522296190262,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009238301077857614,
"signal/frontier_coverage_15/centered_abs_mean": 0.07587840259075165,
"signal/frontier_coverage_15/group_std_mean": 0.09482774585485458,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015400785207748412,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010850611375644802,
"signal/frontier_coverage_20/centered_abs_mean": 0.10519644320011139,
"signal/frontier_coverage_20/group_std_mean": 0.13207355737686158,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021333112940192224,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015043091727420688,
"signal/frontier_coverage_25/centered_abs_mean": 0.14107316136360168,
"signal/frontier_coverage_25/group_std_mean": 0.17815456092357634,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028566186130046845,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002017346234060824,
"signal/frontier_coverage_5/centered_abs_mean": 0.1673600971698761,
"signal/frontier_coverage_5/group_std_mean": 0.21709263622760772,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03373638391494751,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023932492826133967,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32988876700401304,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3970253348350525,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46771731972694397,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03298887610435486,
"step": 520
},
{
"calibration/aurc": 0.07573435525317126,
"calibration/batch_distribution_entropy": 0.9798587675052751,
"calibration/buffer_distribution_entropy": 0.9805877942073135,
"calibration/confidence_entropy": 0.48889949932619253,
"calibration/coverage@0%": 0.1964918246394519,
"calibration/coverage@1%": 0.31187742499497545,
"calibration/coverage@10%": 0.727026166554092,
"calibration/coverage@15%": 0.8365429401666103,
"calibration/coverage@20%": 0.9077380952380952,
"calibration/coverage@25%": 0.9482638888888889,
"calibration/coverage@30%": 0.9755208333333334,
"calibration/coverage@5%": 0.5086287200446998,
"calibration/ece": 0.21796827993122228,
"calibration/mean_confidence": 0.554094432213387,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015624999999999977,
"completions/max_length": 4022.0,
"completions/max_terminated_length": 4022.0,
"completions/mean_length": 975.5559936523438,
"completions/mean_terminated_length": 991.1448974609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.4,
"epoch": 1.2615967300408744,
"grad_norm": 0.002525532152503729,
"learning_rate": 3.094951923076923e-06,
"loss": -0.0445,
"num_tokens": 1362017970.0,
"reward": 1.0022146463394166,
"reward_std": 0.13520086407661439,
"rewards/accuracy_reward": 0.7090277671813965,
"rewards/brier_reward": 0.8121278762817383,
"rewards/confidence_uniqueness_reward": 0.9360453128814697,
"rewards/format_reward": 0.9843749880790711,
"rewards/frontier_coverage_0": 0.013298888225108385,
"rewards/frontier_coverage_1": 0.013298888225108385,
"rewards/frontier_coverage_10": 0.04437965005636215,
"rewards/frontier_coverage_15": 0.09919513911008834,
"rewards/frontier_coverage_20": 0.16938573122024536,
"rewards/frontier_coverage_25": 0.2509212583303452,
"rewards/frontier_coverage_5": 0.013450206723064184,
"rewards/frontier_entropy_batch_reward": -0.27940293252468107,
"signal/accuracy_reward/centered_abs_mean": 0.14516059160232545,
"signal/accuracy_reward/group_std_mean": 0.1927432417869568,
"signal/accuracy_reward/group_zero_std_frac": 0.44722222685813906,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.031061041355133,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07258029580116272,
"signal/advantage_abs_mean": 0.7332652449607849,
"signal/advantage_pre_scale_abs_mean": 0.09713614881038665,
"signal/advantage_pre_scale_std": 0.1668863743543625,
"signal/advantage_std": 0.9830695152282715,
"signal/brier_reward/centered_abs_mean": 0.12466190755367279,
"signal/brier_reward/group_std_mean": 0.16394980251789093,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.176994127035141,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012466190941631794,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036994371190667154,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06596145778894424,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05264209508895874,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003699437016621232,
"signal/format_reward/centered_abs_mean": 0.02797309048473835,
"signal/format_reward/group_std_mean": 0.05659161433577538,
"signal/format_reward/group_zero_std_frac": 0.7555555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.19929880797863006,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013986545242369175,
"signal/frontier_coverage_0/centered_abs_mean": 0.176213401556015,
"signal/frontier_coverage_0/group_std_mean": 0.23016616702079773,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03586722575128078,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025198515504598618,
"signal/frontier_coverage_1/centered_abs_mean": 0.176213401556015,
"signal/frontier_coverage_1/group_std_mean": 0.23016616702079773,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03586722575128078,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025198515504598618,
"signal/frontier_coverage_10/centered_abs_mean": 0.0657036691904068,
"signal/frontier_coverage_10/group_std_mean": 0.08542147278785706,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013377317041158677,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009395624394528568,
"signal/frontier_coverage_15/centered_abs_mean": 0.07424005419015885,
"signal/frontier_coverage_15/group_std_mean": 0.0930503711104393,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015023346804082394,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010616328101605177,
"signal/frontier_coverage_20/centered_abs_mean": 0.10434054583311081,
"signal/frontier_coverage_20/group_std_mean": 0.13108078241348267,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021078139171004297,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014920698245987295,
"signal/frontier_coverage_25/centered_abs_mean": 0.14281499981880189,
"signal/frontier_coverage_25/group_std_mean": 0.17953548729419708,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028848520666360854,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002042254386469722,
"signal/frontier_coverage_5/centered_abs_mean": 0.17538869976997376,
"signal/frontier_coverage_5/group_std_mean": 0.22913878560066223,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035702398791909215,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025080583058297635,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32218562960624697,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3871644794940948,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45660458207130433,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032218563556671145,
"step": 525
},
{
"calibration/aurc": 0.12962692277572296,
"calibration/batch_distribution_entropy": 0.9677968027317269,
"calibration/buffer_distribution_entropy": 0.9805717030444366,
"calibration/confidence_entropy": 0.48113403159133633,
"calibration/coverage@0%": 0.18798672099690084,
"calibration/coverage@1%": 0.2269086786688585,
"calibration/coverage@10%": 0.6417013312562826,
"calibration/coverage@15%": 0.7091990515665254,
"calibration/coverage@20%": 0.751765976896867,
"calibration/coverage@25%": 0.7722513089005235,
"calibration/coverage@30%": 0.8313000360120778,
"calibration/coverage@5%": 0.44918795254640304,
"calibration/ece": 0.19004887245585186,
"calibration/mean_confidence": 0.5680402399944228,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013628472222222233,
"completions/max_length": 3961.8,
"completions/max_terminated_length": 3961.8,
"completions/mean_length": 875.2192016601563,
"completions/mean_terminated_length": 887.3105590820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 137.4,
"epoch": 1.2735965800427493,
"grad_norm": 0.0029400011990219355,
"learning_rate": 3.0649038461538464e-06,
"loss": -0.0226,
"num_tokens": 1375179855.0,
"reward": 1.0003417372703551,
"reward_std": 0.13130183517932892,
"rewards/accuracy_reward": 0.703125,
"rewards/brier_reward": 0.8222099423408509,
"rewards/confidence_uniqueness_reward": 0.936951196193695,
"rewards/format_reward": 0.9862847328186035,
"rewards/frontier_coverage_0": 0.03191882474347949,
"rewards/frontier_coverage_1": 0.03191882474347949,
"rewards/frontier_coverage_10": 0.04538390077650547,
"rewards/frontier_coverage_15": 0.10414295643568039,
"rewards/frontier_coverage_20": 0.17619226574897767,
"rewards/frontier_coverage_25": 0.25885328352451326,
"rewards/frontier_coverage_5": 0.03208579635247588,
"rewards/frontier_entropy_batch_reward": -0.3001033067703247,
"signal/accuracy_reward/centered_abs_mean": 0.15182291865348815,
"signal/accuracy_reward/group_std_mean": 0.19853868186473847,
"signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0715280532836915,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07591145932674408,
"signal/advantage_abs_mean": 0.7480924487113952,
"signal/advantage_pre_scale_abs_mean": 0.0976143628358841,
"signal/advantage_pre_scale_std": 0.16226148903369902,
"signal/advantage_std": 0.9830726742744446,
"signal/brier_reward/centered_abs_mean": 0.12366195023059845,
"signal/brier_reward/group_std_mean": 0.16041145622730255,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17475055456161498,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012366195768117904,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033436324819922446,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05639224275946617,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.047277077287435534,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003343632631003857,
"signal/format_reward/centered_abs_mean": 0.02293836809694767,
"signal/format_reward/group_std_mean": 0.04403809979557991,
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.16185255199670792,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011469184048473835,
"signal/frontier_coverage_0/centered_abs_mean": 0.17727761566638947,
"signal/frontier_coverage_0/group_std_mean": 0.2270788460969925,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03581186383962631,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025350698735564945,
"signal/frontier_coverage_1/centered_abs_mean": 0.17727761566638947,
"signal/frontier_coverage_1/group_std_mean": 0.2270788460969925,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03581186383962631,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025350698735564945,
"signal/frontier_coverage_10/centered_abs_mean": 0.07169679552316666,
"signal/frontier_coverage_10/group_std_mean": 0.09079298973083497,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014486683905124665,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010252641513943672,
"signal/frontier_coverage_15/centered_abs_mean": 0.07800347357988358,
"signal/frontier_coverage_15/group_std_mean": 0.09790896475315095,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015783482789993288,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011154496809467674,
"signal/frontier_coverage_20/centered_abs_mean": 0.10981054455041886,
"signal/frontier_coverage_20/group_std_mean": 0.1392547756433487,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02221333757042885,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015702907461673022,
"signal/frontier_coverage_25/centered_abs_mean": 0.1499340057373047,
"signal/frontier_coverage_25/group_std_mean": 0.19064950346946716,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03032132089138031,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021440562792122363,
"signal/frontier_coverage_5/centered_abs_mean": 0.17626629769802094,
"signal/frontier_coverage_5/group_std_mean": 0.22582550942897797,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03560806550085545,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002520608017221093,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32504919171333313,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3949630320072174,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4604054570198059,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032504919171333316,
"step": 530
},
{
"calibration/aurc": 0.05840357354887128,
"calibration/batch_distribution_entropy": 0.9378896107732835,
"calibration/buffer_distribution_entropy": 0.9803632250109828,
"calibration/confidence_entropy": 0.4649623520355387,
"calibration/coverage@0%": 0.1630159388228418,
"calibration/coverage@1%": 0.26672793130768035,
"calibration/coverage@10%": 0.7872173597822631,
"calibration/coverage@15%": 0.8644275603071581,
"calibration/coverage@20%": 0.9390392566484804,
"calibration/coverage@25%": 0.9847320651498197,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.7084764103084508,
"calibration/ece": 0.1835135729808905,
"calibration/mean_confidence": 0.6055113479510223,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009809027777777767,
"completions/max_length": 3855.2,
"completions/max_terminated_length": 3855.2,
"completions/mean_length": 789.052001953125,
"completions/mean_terminated_length": 797.0057495117187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 133.4,
"epoch": 1.2855964300446243,
"grad_norm": 0.0027481773868203163,
"learning_rate": 3.0348557692307694e-06,
"loss": -0.0222,
"num_tokens": 1387338630.0,
"reward": 1.0056124329566956,
"reward_std": 0.11937729865312577,
"rewards/accuracy_reward": 0.712499988079071,
"rewards/brier_reward": 0.8213678240776062,
"rewards/confidence_uniqueness_reward": 0.9392488479614258,
"rewards/format_reward": 0.9901041626930237,
"rewards/frontier_coverage_0": 0.022314731776714326,
"rewards/frontier_coverage_1": 0.022314731776714326,
"rewards/frontier_coverage_10": 0.04989167377352714,
"rewards/frontier_coverage_15": 0.1045038491487503,
"rewards/frontier_coverage_20": 0.17718027234077455,
"rewards/frontier_coverage_25": 0.2612395048141479,
"rewards/frontier_coverage_5": 0.022799749858677387,
"rewards/frontier_entropy_batch_reward": -0.31192846298217775,
"signal/accuracy_reward/centered_abs_mean": 0.1269965276122093,
"signal/accuracy_reward/group_std_mean": 0.17362678050994873,
"signal/accuracy_reward/group_zero_std_frac": 0.4888889014720917,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9380814790725708,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06349826380610465,
"signal/advantage_abs_mean": 0.7406193017959595,
"signal/advantage_pre_scale_abs_mean": 0.08632948994636536,
"signal/advantage_pre_scale_std": 0.14735422730445863,
"signal/advantage_std": 0.9830026030540466,
"signal/brier_reward/centered_abs_mean": 0.11631175279617309,
"signal/brier_reward/group_std_mean": 0.15423674881458282,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17220796644687653,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011631175130605697,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029228382930159568,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04904755130410195,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0437807485461235,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029228384140878917,
"signal/format_reward/centered_abs_mean": 0.017263454757630824,
"signal/format_reward/group_std_mean": 0.034723594039678576,
"signal/format_reward/group_zero_std_frac": 0.85,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.13021927326917648,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008631727378815412,
"signal/frontier_coverage_0/centered_abs_mean": 0.15657298862934113,
"signal/frontier_coverage_0/group_std_mean": 0.2082358866930008,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0331398393958807,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022389938123524187,
"signal/frontier_coverage_1/centered_abs_mean": 0.15657298862934113,
"signal/frontier_coverage_1/group_std_mean": 0.2082358866930008,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0331398393958807,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022389938123524187,
"signal/frontier_coverage_10/centered_abs_mean": 0.07481402903795242,
"signal/frontier_coverage_10/group_std_mean": 0.09677753150463105,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015874645672738552,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001069840556010604,
"signal/frontier_coverage_15/centered_abs_mean": 0.07884364426136017,
"signal/frontier_coverage_15/group_std_mean": 0.09854675233364105,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016783738508820534,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011274641146883368,
"signal/frontier_coverage_20/centered_abs_mean": 0.11144341826438904,
"signal/frontier_coverage_20/group_std_mean": 0.1409495711326599,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023704275116324426,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001593640772625804,
"signal/frontier_coverage_25/centered_abs_mean": 0.15060859620571138,
"signal/frontier_coverage_25/group_std_mean": 0.1917654901742935,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03201264552772045,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021537028020247816,
"signal/frontier_coverage_5/centered_abs_mean": 0.1554807960987091,
"signal/frontier_coverage_5/group_std_mean": 0.20682709217071532,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03290844485163689,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022233754862099886,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32440600991249086,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3896322250366211,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48102996349334715,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03244060054421425,
"step": 535
},
{
"calibration/aurc": 0.13244185631787503,
"calibration/batch_distribution_entropy": 0.9800974535080833,
"calibration/buffer_distribution_entropy": 0.9808648778370616,
"calibration/confidence_entropy": 0.4809548343091509,
"calibration/coverage@0%": 0.14100108636490322,
"calibration/coverage@1%": 0.1484478948755415,
"calibration/coverage@10%": 0.490198095221616,
"calibration/coverage@15%": 0.6693744131198277,
"calibration/coverage@20%": 0.7559760305725088,
"calibration/coverage@25%": 0.8143260634639944,
"calibration/coverage@30%": 0.8785299372239944,
"calibration/coverage@5%": 0.3187934513805214,
"calibration/ece": 0.17710937135755084,
"calibration/mean_confidence": 0.4955213287013769,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020572916666666673,
"completions/max_length": 3891.2,
"completions/max_terminated_length": 3891.2,
"completions/mean_length": 809.2749145507812,
"completions/mean_terminated_length": 826.44189453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 93.8,
"epoch": 1.2975962800464993,
"grad_norm": 0.0025462766643613577,
"learning_rate": 3.0048076923076923e-06,
"loss": -0.0439,
"num_tokens": 1399767973.0,
"reward": 1.0000099539756775,
"reward_std": 0.1379528284072876,
"rewards/accuracy_reward": 0.7075520634651185,
"rewards/brier_reward": 0.8036871314048767,
"rewards/confidence_uniqueness_reward": 0.9327346086502075,
"rewards/format_reward": 0.9793402791023255,
"rewards/frontier_coverage_0": 0.017869478557258844,
"rewards/frontier_coverage_1": 0.017869478557258844,
"rewards/frontier_coverage_10": 0.0430420383810997,
"rewards/frontier_coverage_15": 0.09954206198453903,
"rewards/frontier_coverage_20": 0.17041370272636414,
"rewards/frontier_coverage_25": 0.25180783569812776,
"rewards/frontier_coverage_5": 0.018168425746262075,
"rewards/frontier_entropy_batch_reward": -0.2592599123716354,
"signal/accuracy_reward/centered_abs_mean": 0.14659830629825593,
"signal/accuracy_reward/group_std_mean": 0.1926487445831299,
"signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.014528787136078,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07329915314912797,
"signal/advantage_abs_mean": 0.7407657027244567,
"signal/advantage_pre_scale_abs_mean": 0.10063390731811524,
"signal/advantage_pre_scale_std": 0.17158576846122742,
"signal/advantage_std": 0.9831032276153564,
"signal/brier_reward/centered_abs_mean": 0.134641233086586,
"signal/brier_reward/group_std_mean": 0.17501663267612458,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18607415556907653,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013464123010635376,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.042544426023960115,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07108568400144577,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.058684717118740085,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004254442779347301,
"signal/format_reward/centered_abs_mean": 0.03412543423473835,
"signal/format_reward/group_std_mean": 0.06144065707921982,
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.23496909141540528,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.017062717117369175,
"signal/frontier_coverage_0/centered_abs_mean": 0.19530532956123353,
"signal/frontier_coverage_0/group_std_mean": 0.25145215094089507,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03858583122491836,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002792866202071309,
"signal/frontier_coverage_1/centered_abs_mean": 0.19530532956123353,
"signal/frontier_coverage_1/group_std_mean": 0.25145215094089507,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03858583122491836,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002792866202071309,
"signal/frontier_coverage_10/centered_abs_mean": 0.07514613270759582,
"signal/frontier_coverage_10/group_std_mean": 0.09620828628540039,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014837125316262245,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001074589730706066,
"signal/frontier_coverage_15/centered_abs_mean": 0.07365463823080062,
"signal/frontier_coverage_15/group_std_mean": 0.09266743957996368,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014589322358369827,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010532613145187498,
"signal/frontier_coverage_20/centered_abs_mean": 0.10024979412555694,
"signal/frontier_coverage_20/group_std_mean": 0.12663647830486296,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01986866146326065,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014335720334202051,
"signal/frontier_coverage_25/centered_abs_mean": 0.1359792798757553,
"signal/frontier_coverage_25/group_std_mean": 0.17195332646369935,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02694905437529087,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019445037469267845,
"signal/frontier_coverage_5/centered_abs_mean": 0.19388082921504973,
"signal/frontier_coverage_5/group_std_mean": 0.24967800378799437,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03830417841672897,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002772495849058032,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31604220271110534,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38787181973457335,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.43719064593315127,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03160422146320343,
"step": 540
},
{
"calibration/aurc": 0.19371810539249967,
"calibration/batch_distribution_entropy": 0.9582393810078139,
"calibration/buffer_distribution_entropy": 0.9807477018011488,
"calibration/confidence_entropy": 0.48479958020192343,
"calibration/coverage@0%": 0.009410442493643909,
"calibration/coverage@1%": 0.009410442493643909,
"calibration/coverage@10%": 0.2788743955445861,
"calibration/coverage@15%": 0.41693663961923677,
"calibration/coverage@20%": 0.5085180756830296,
"calibration/coverage@25%": 0.7402995306519997,
"calibration/coverage@30%": 0.8931141066789896,
"calibration/coverage@5%": 0.10086368189135078,
"calibration/ece": 0.12323604987285137,
"calibration/mean_confidence": 0.5889928364967711,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007204861111111116,
"completions/max_length": 3744.0,
"completions/max_terminated_length": 3744.0,
"completions/mean_length": 740.11953125,
"completions/mean_terminated_length": 745.5853881835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 137.0,
"epoch": 1.3095961300483743,
"grad_norm": 0.002864128677174449,
"learning_rate": 2.974759615384616e-06,
"loss": -0.0146,
"num_tokens": 1411338950.0,
"reward": 0.9936476588249207,
"reward_std": 0.12532853931188584,
"rewards/accuracy_reward": 0.6754340291023254,
"rewards/brier_reward": 0.8261975884437561,
"rewards/confidence_uniqueness_reward": 0.9434968113899231,
"rewards/format_reward": 0.9927083373069763,
"rewards/frontier_coverage_0": 0.04778345115482807,
"rewards/frontier_coverage_1": 0.04778345115482807,
"rewards/frontier_coverage_10": 0.05499168708920479,
"rewards/frontier_coverage_15": 0.10132022351026534,
"rewards/frontier_coverage_20": 0.1685120642185211,
"rewards/frontier_coverage_25": 0.244861963391304,
"rewards/frontier_coverage_5": 0.047942586988210675,
"rewards/frontier_entropy_batch_reward": -0.27591673135757444,
"signal/accuracy_reward/centered_abs_mean": 0.14527452290058135,
"signal/accuracy_reward/group_std_mean": 0.19745134711265563,
"signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0105983018875122,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07263726145029067,
"signal/advantage_abs_mean": 0.7285477638244628,
"signal/advantage_pre_scale_abs_mean": 0.09042828679084777,
"signal/advantage_pre_scale_std": 0.14941135048866272,
"signal/advantage_std": 0.9830885171890259,
"signal/brier_reward/centered_abs_mean": 0.11565537899732589,
"signal/brier_reward/group_std_mean": 0.15386753976345063,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16175343692302704,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011565538495779038,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025201234966516495,
"signal/confidence_uniqueness_reward/group_std_mean": 0.044658108800649646,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03534325771033764,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025201234966516495,
"signal/format_reward/centered_abs_mean": 0.013400607462972402,
"signal/format_reward/group_std_mean": 0.03037625327706337,
"signal/format_reward/group_zero_std_frac": 0.8555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09462228938937187,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006700303731486201,
"signal/frontier_coverage_0/centered_abs_mean": 0.16465333700180054,
"signal/frontier_coverage_0/group_std_mean": 0.21388448178768157,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032869096100330356,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002354542585089803,
"signal/frontier_coverage_1/centered_abs_mean": 0.16465333700180054,
"signal/frontier_coverage_1/group_std_mean": 0.21388448178768157,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032869096100330356,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002354542585089803,
"signal/frontier_coverage_10/centered_abs_mean": 0.06270370185375214,
"signal/frontier_coverage_10/group_std_mean": 0.08065424412488938,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012526192888617515,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008966629044152796,
"signal/frontier_coverage_15/centered_abs_mean": 0.07721384763717651,
"signal/frontier_coverage_15/group_std_mean": 0.09745251387357712,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015420524403452873,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011041580000892282,
"signal/frontier_coverage_20/centered_abs_mean": 0.11166613698005676,
"signal/frontier_coverage_20/group_std_mean": 0.14150880575180053,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022289788722991942,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001596825709566474,
"signal/frontier_coverage_25/centered_abs_mean": 0.15288293361663818,
"signal/frontier_coverage_25/group_std_mean": 0.19415551722049712,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03050895407795906,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021862258901819585,
"signal/frontier_coverage_5/centered_abs_mean": 0.1633177638053894,
"signal/frontier_coverage_5/group_std_mean": 0.21221773326396942,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032602763175964354,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335443953052163,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3143535256385803,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3824365258216858,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4391070544719696,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03143535330891609,
"step": 545
},
{
"calibration/aurc": 0.0995656417360469,
"calibration/batch_distribution_entropy": 0.938787714013453,
"calibration/buffer_distribution_entropy": 0.9802957592257414,
"calibration/confidence_entropy": 0.4762113632355807,
"calibration/coverage@0%": 0.06886015541822894,
"calibration/coverage@1%": 0.20479765541822897,
"calibration/coverage@10%": 0.6694310362170939,
"calibration/coverage@15%": 0.7894442341243252,
"calibration/coverage@20%": 0.8726531096446104,
"calibration/coverage@25%": 0.9322158247251984,
"calibration/coverage@30%": 0.9687002652519894,
"calibration/coverage@5%": 0.33284931091946046,
"calibration/ece": 0.13048564570174642,
"calibration/mean_confidence": 0.6237522141039131,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003993055555555536,
"completions/max_length": 3780.4,
"completions/max_terminated_length": 3780.4,
"completions/mean_length": 732.2462646484375,
"completions/mean_terminated_length": 735.1727905273438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.4,
"epoch": 1.3215959800502493,
"grad_norm": 0.002857534447684884,
"learning_rate": 2.9447115384615386e-06,
"loss": -0.0009,
"num_tokens": 1422844827.0,
"reward": 1.0154416918754579,
"reward_std": 0.11865588426589965,
"rewards/accuracy_reward": 0.723437511920929,
"rewards/brier_reward": 0.8295330762863159,
"rewards/confidence_uniqueness_reward": 0.944782269001007,
"rewards/format_reward": 0.9960069417953491,
"rewards/frontier_coverage_0": 0.014874590956605972,
"rewards/frontier_coverage_1": 0.014878203091211618,
"rewards/frontier_coverage_10": 0.04686977192759514,
"rewards/frontier_coverage_15": 0.10758093893527984,
"rewards/frontier_coverage_20": 0.1838034689426422,
"rewards/frontier_coverage_25": 0.26971648037433626,
"rewards/frontier_coverage_5": 0.01526981797069311,
"rewards/frontier_entropy_batch_reward": -0.3104989051818848,
"signal/accuracy_reward/centered_abs_mean": 0.1535481721162796,
"signal/accuracy_reward/group_std_mean": 0.19500392973423003,
"signal/accuracy_reward/group_zero_std_frac": 0.46666666865348816,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.110567831993103,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0767740860581398,
"signal/advantage_abs_mean": 0.7761712193489074,
"signal/advantage_pre_scale_abs_mean": 0.0919352874159813,
"signal/advantage_pre_scale_std": 0.143514946103096,
"signal/advantage_std": 0.9830339431762696,
"signal/brier_reward/centered_abs_mean": 0.11515444964170456,
"signal/brier_reward/group_std_mean": 0.14821325838565827,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16740451157093048,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011515445262193679,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02092781737446785,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033663921803236005,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03046945817768574,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002092781872488558,
"signal/format_reward/centered_abs_mean": 0.007443576492369175,
"signal/format_reward/group_std_mean": 0.01733107175678015,
"signal/format_reward/group_zero_std_frac": 0.9166666865348816,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05412430316209793,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0037217882461845877,
"signal/frontier_coverage_0/centered_abs_mean": 0.16963888108730316,
"signal/frontier_coverage_0/group_std_mean": 0.21692575812339782,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03522733971476555,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024258360732346772,
"signal/frontier_coverage_1/centered_abs_mean": 0.16954942643642426,
"signal/frontier_coverage_1/group_std_mean": 0.2168179452419281,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035208532214164735,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002424556901678443,
"signal/frontier_coverage_10/centered_abs_mean": 0.06202037930488587,
"signal/frontier_coverage_10/group_std_mean": 0.07880822569131851,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012900578789412975,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008868913981132209,
"signal/frontier_coverage_15/centered_abs_mean": 0.07617910951375961,
"signal/frontier_coverage_15/group_std_mean": 0.0960146278142929,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015834695287048818,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010893612634390593,
"signal/frontier_coverage_20/centered_abs_mean": 0.10935810059309006,
"signal/frontier_coverage_20/group_std_mean": 0.1381034791469574,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022717427089810373,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015638208482414485,
"signal/frontier_coverage_25/centered_abs_mean": 0.14982065558433533,
"signal/frontier_coverage_25/group_std_mean": 0.18926362693309784,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031108209863305092,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021424354054033756,
"signal/frontier_coverage_5/centered_abs_mean": 0.1681896448135376,
"signal/frontier_coverage_5/group_std_mean": 0.2151541143655777,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03492706418037415,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024051119573414324,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32938060760498045,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3938984632492065,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4788420915603638,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032938060164451596,
"step": 550
},
{
"epoch": 1.3215959800502493,
"eval_calibration/aurc": 0.12777494995730462,
"eval_calibration/batch_distribution_entropy": 0.9146714874440477,
"eval_calibration/buffer_distribution_entropy": 0.97970379700712,
"eval_calibration/confidence_entropy": 0.4720832389869453,
"eval_calibration/coverage@0%": 0.2565524193548387,
"eval_calibration/coverage@1%": 0.2565524193548387,
"eval_calibration/coverage@10%": 0.5593077956989247,
"eval_calibration/coverage@15%": 0.679771505376344,
"eval_calibration/coverage@20%": 0.7795698924731184,
"eval_calibration/coverage@25%": 0.9163306451612904,
"eval_calibration/coverage@30%": 0.9739583333333334,
"eval_calibration/coverage@5%": 0.2878024193548387,
"eval_calibration/ece": 0.19415833333333332,
"eval_calibration/mean_confidence": 0.5893913306451612,
"eval_completions/clipped_ratio": 0.002604166666666685,
"eval_completions/max_length": 2354.5,
"eval_completions/max_terminated_length": 2354.5,
"eval_completions/mean_length": 758.33837890625,
"eval_completions/mean_terminated_length": 760.3116963704427,
"eval_completions/min_length": 99.83333333333333,
"eval_completions/min_terminated_length": 185.16666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 1422844827.0,
"eval_reward": 0.9168184598286947,
"eval_reward_std": 0.23817753295103708,
"eval_rewards/accuracy_reward": 0.6710069477558136,
"eval_rewards/brier_reward": 0.830171674489975,
"eval_rewards/confidence_uniqueness_reward": 0.8910415371259054,
"eval_rewards/format_reward": 0.9973958432674408,
"eval_rewards/frontier_coverage_0": 0.04973413205395142,
"eval_rewards/frontier_coverage_1": 0.049713826117416225,
"eval_rewards/frontier_coverage_10": 0.051547558357318245,
"eval_rewards/frontier_coverage_15": 0.10180203368266423,
"eval_rewards/frontier_coverage_20": 0.1691055049498876,
"eval_rewards/frontier_coverage_25": 0.24416544288396835,
"eval_rewards/frontier_coverage_5": 0.049687957080701985,
"eval_rewards/frontier_entropy_batch_reward": -0.9973958432674408,
"eval_runtime": 166.8536,
"eval_samples_per_second": 5.993,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4271375884612401,
"eval_signal/accuracy_reward/group_std_mean": 0.46825483938058216,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9032614231109619,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21356879423062006,
"eval_signal/advantage_abs_mean": 0.8906104365984598,
"eval_signal/advantage_pre_scale_abs_mean": 0.21314153323570886,
"eval_signal/advantage_pre_scale_std": 0.23594039926926294,
"eval_signal/advantage_std": 0.9864058494567871,
"eval_signal/brier_reward/centered_abs_mean": 0.16124566892782846,
"eval_signal/brier_reward/group_std_mean": 0.21817840884129205,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0682522679368655,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01612456701695919,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04524739272892475,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.060582934568325676,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019175103555123012,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004524739536767204,
"eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376,
"eval_signal/format_reward/group_std_mean": 0.014731391333043575,
"eval_signal/format_reward/group_zero_std_frac": 0.9166666865348816,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010400833562016487,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2642584939797719,
"eval_signal/frontier_coverage_0/group_std_mean": 0.36403438945611316,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016022339463233948,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037788964497546353,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.26409488171339035,
"eval_signal/frontier_coverage_1/group_std_mean": 0.36384013791879016,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016012390454610188,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003776557006252309,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0783597007393837,
"eval_signal/frontier_coverage_10/group_std_mean": 0.10474599276979764,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004751801490783691,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011205436894670129,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.12035289034247398,
"eval_signal/frontier_coverage_15/group_std_mean": 0.1514952356616656,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007285447558388114,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001721046263507257,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.20810386538505554,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2537065049012502,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012596863321959972,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029758852906525135,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3042859335740407,
"eval_signal/frontier_coverage_25/group_std_mean": 0.366447314620018,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018416117566327255,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004351288701097171,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2619953279693921,
"eval_signal/frontier_coverage_5/group_std_mean": 0.361324484149615,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015885391427824896,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003746533145507177,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0020801667124032974,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045572955471774,
"eval_steps_per_second": 0.036,
"step": 550
},
{
"epoch": 1.3215959800502493,
"step": 550,
"train_probe_calibration/aurc": 0.17908947204745143,
"train_probe_calibration/batch_distribution_entropy": 0.9130333424554435,
"train_probe_calibration/buffer_distribution_entropy": 0.9798709378353916,
"train_probe_calibration/confidence_entropy": 0.49644389175607556,
"train_probe_calibration/coverage@0%": 0.17361111111111113,
"train_probe_calibration/coverage@1%": 0.17361111111111113,
"train_probe_calibration/coverage@10%": 0.38472222222222224,
"train_probe_calibration/coverage@15%": 0.6371527777777778,
"train_probe_calibration/coverage@20%": 0.7107638888888889,
"train_probe_calibration/coverage@25%": 0.8312499999999999,
"train_probe_calibration/coverage@30%": 0.9409722222222222,
"train_probe_calibration/coverage@5%": 0.2152777777777778,
"train_probe_calibration/ece": 0.20402147569444443,
"train_probe_calibration/mean_confidence": 0.5926106076388887,
"train_probe_completions/clipped_ratio": 0.0026041666666666665,
"train_probe_completions/max_length": 2099.6666666666665,
"train_probe_completions/max_terminated_length": 2099.6666666666665,
"train_probe_completions/mean_length": 734.828135172526,
"train_probe_completions/mean_terminated_length": 736.7092793782552,
"train_probe_completions/min_length": 105.83333333333333,
"train_probe_completions/min_terminated_length": 148.83333333333334,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1422844827.0,
"train_probe_reward": 0.9271653195222219,
"train_probe_reward_std": 0.23420592894156775,
"train_probe_rewards/accuracy_reward": 0.6935763955116272,
"train_probe_rewards/brier_reward": 0.8282076120376587,
"train_probe_rewards/confidence_uniqueness_reward": 0.8932102719942728,
"train_probe_rewards/format_reward": 0.996527781089147,
"train_probe_rewards/frontier_coverage_0": 0.03307745155567924,
"train_probe_rewards/frontier_coverage_1": 0.033096089803924165,
"train_probe_rewards/frontier_coverage_10": 0.04708883538842201,
"train_probe_rewards/frontier_coverage_15": 0.10199795787533124,
"train_probe_rewards/frontier_coverage_20": 0.17264153808355331,
"train_probe_rewards/frontier_coverage_25": 0.2519413009285927,
"train_probe_rewards/frontier_coverage_5": 0.03317807226752242,
"train_probe_rewards/frontier_entropy_batch_reward": -0.996527781089147,
"train_probe_runtime": 161.636,
"train_probe_samples_per_second": 6.187,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4127061615387599,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4601968179146449,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8917946914831797,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20635308076937994,
"train_probe_signal/advantage_abs_mean": 0.8753375907739004,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20616419365008673,
"train_probe_signal/advantage_pre_scale_std": 0.2326151430606842,
"train_probe_signal/advantage_std": 0.9863971670468649,
"train_probe_signal/brier_reward/centered_abs_mean": 0.16185809671878815,
"train_probe_signal/brier_reward/group_std_mean": 0.2190844938158989,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07006527669727802,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.016185809237261612,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04507234009603659,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05927520431578159,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01946852883944909,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045072339319934445,
"train_probe_signal/format_reward/centered_abs_mean": 0.006618923507630825,
"train_probe_signal/format_reward/group_std_mean": 0.01665244624018669,
"train_probe_signal/format_reward/group_zero_std_frac": 0.9166666865348816,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013900812404851118,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0033094617538154125,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2572428708275159,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.36240187784036,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015925037519385416,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036785730238383016,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2570945918560028,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.3622182110945384,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01591583030919234,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003676452557556331,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07643753911058108,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.1038547232747078,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004740113392472267,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010930567999215175,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11760762209693591,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.14834488679965338,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0072829612375547486,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016817889603165288,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.20278030882279077,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.247420996427536,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012549723964184523,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002899758517742157,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.29607370992501575,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.35755864282449085,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018319410582383473,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004233853969102104,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2552005996306737,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3598967989285787,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015798571209112804,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036493684941281876,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.006618923507630825,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.01665244624018669,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0027801623412718377,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0006618923507630825,
"train_probe_steps_per_second": 0.037
},
{
"calibration/aurc": 0.19663234235869248,
"calibration/batch_distribution_entropy": 0.9651647751097935,
"calibration/buffer_distribution_entropy": 0.979815394719828,
"calibration/confidence_entropy": 0.4981850665533334,
"calibration/coverage@0%": 0.019349941668759134,
"calibration/coverage@1%": 0.019349941668759134,
"calibration/coverage@10%": 0.3412770994600119,
"calibration/coverage@15%": 0.5117494931015768,
"calibration/coverage@20%": 0.5975047467954091,
"calibration/coverage@25%": 0.6728722988900216,
"calibration/coverage@30%": 0.7757951279144336,
"calibration/coverage@5%": 0.06757603701182885,
"calibration/ece": 0.14719846197953987,
"calibration/mean_confidence": 0.5519945923747531,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003298611111111116,
"completions/max_length": 3510.8,
"completions/max_terminated_length": 3510.8,
"completions/mean_length": 757.6384521484375,
"completions/mean_terminated_length": 760.1795532226563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.0,
"epoch": 1.3335958300521242,
"grad_norm": 0.0024476582184433937,
"learning_rate": 2.9146634615384615e-06,
"loss": -0.0037,
"num_tokens": 1434677462.0,
"reward": 1.0103063583374023,
"reward_std": 0.10284363478422165,
"rewards/accuracy_reward": 0.7134548544883728,
"rewards/brier_reward": 0.8260629892349243,
"rewards/confidence_uniqueness_reward": 0.9457016348838806,
"rewards/format_reward": 0.9967013835906983,
"rewards/frontier_coverage_0": 0.023881956841796635,
"rewards/frontier_coverage_1": 0.023881740309298037,
"rewards/frontier_coverage_10": 0.048338998854160306,
"rewards/frontier_coverage_15": 0.10219060182571411,
"rewards/frontier_coverage_20": 0.1739596724510193,
"rewards/frontier_coverage_25": 0.2564005136489868,
"rewards/frontier_coverage_5": 0.024192382209002973,
"rewards/frontier_entropy_batch_reward": -0.31283934116363527,
"signal/accuracy_reward/centered_abs_mean": 0.12223849892616272,
"signal/accuracy_reward/group_std_mean": 0.1589464396238327,
"signal/accuracy_reward/group_zero_std_frac": 0.55277778506279,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9898079633712769,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06111924946308136,
"signal/advantage_abs_mean": 0.7705781579017639,
"signal/advantage_pre_scale_abs_mean": 0.07884201258420945,
"signal/advantage_pre_scale_std": 0.1269924134016037,
"signal/advantage_std": 0.9828511595726013,
"signal/brier_reward/centered_abs_mean": 0.11215368509292603,
"signal/brier_reward/group_std_mean": 0.1447371155023575,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18304523229598998,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011215368844568729,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01934829242527485,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02940821126103401,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03181086927652359,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019348292844370008,
"signal/format_reward/centered_abs_mean": 0.0060004339087754485,
"signal/format_reward/group_std_mean": 0.012914158403873444,
"signal/format_reward/group_zero_std_frac": 0.9416666865348816,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04921326451003551,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0030002169543877242,
"signal/frontier_coverage_0/centered_abs_mean": 0.16506983935832978,
"signal/frontier_coverage_0/group_std_mean": 0.2103798657655716,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03838563859462738,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023604985792189835,
"signal/frontier_coverage_1/centered_abs_mean": 0.1649801552295685,
"signal/frontier_coverage_1/group_std_mean": 0.21026895344257354,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038364893198013304,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002359216194599867,
"signal/frontier_coverage_10/centered_abs_mean": 0.06287136897444726,
"signal/frontier_coverage_10/group_std_mean": 0.07955214679241181,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014653045125305653,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008990605594590307,
"signal/frontier_coverage_15/centered_abs_mean": 0.07382966876029969,
"signal/frontier_coverage_15/group_std_mean": 0.09216237664222718,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01734896432608366,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010557642672210932,
"signal/frontier_coverage_20/centered_abs_mean": 0.10129383057355881,
"signal/frontier_coverage_20/group_std_mean": 0.12719354182481765,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023834266886115073,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014485017396509647,
"signal/frontier_coverage_25/centered_abs_mean": 0.13603402376174928,
"signal/frontier_coverage_25/group_std_mean": 0.17175181806087494,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031987834721803665,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019452865933999419,
"signal/frontier_coverage_5/centered_abs_mean": 0.1638639748096466,
"signal/frontier_coverage_5/group_std_mean": 0.20887594819068908,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03810485303401947,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023432548390701414,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3287335276603699,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3981877684593201,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5402589082717896,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03287335261702538,
"step": 555
},
{
"calibration/aurc": 0.12372041385720985,
"calibration/batch_distribution_entropy": 0.9359188147593194,
"calibration/buffer_distribution_entropy": 0.9801759788746939,
"calibration/confidence_entropy": 0.46966744545122846,
"calibration/coverage@0%": 0.022477618761264504,
"calibration/coverage@1%": 0.07574132633306607,
"calibration/coverage@10%": 0.5220841856664131,
"calibration/coverage@15%": 0.6662842303817766,
"calibration/coverage@20%": 0.8679514517074074,
"calibration/coverage@25%": 0.9368390992167102,
"calibration/coverage@30%": 0.9577023498694517,
"calibration/coverage@5%": 0.22553054521905386,
"calibration/ece": 0.1454074541603872,
"calibration/mean_confidence": 0.620010325011976,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003993055555555558,
"completions/max_length": 3796.8,
"completions/max_terminated_length": 3796.8,
"completions/mean_length": 820.9757080078125,
"completions/mean_terminated_length": 824.318359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 1.3455956800539992,
"grad_norm": 0.0024596690200269222,
"learning_rate": 2.8846153846153845e-06,
"loss": 0.0007,
"num_tokens": 1447224798.0,
"reward": 1.016961658000946,
"reward_std": 0.1075833186507225,
"rewards/accuracy_reward": 0.7207465171813965,
"rewards/brier_reward": 0.8354118824005127,
"rewards/confidence_uniqueness_reward": 0.945723009109497,
"rewards/format_reward": 0.9960069537162781,
"rewards/frontier_coverage_0": 0.031797058135271075,
"rewards/frontier_coverage_1": 0.03180254213511944,
"rewards/frontier_coverage_10": 0.05350678041577339,
"rewards/frontier_coverage_15": 0.11030341684818268,
"rewards/frontier_coverage_20": 0.1851776123046875,
"rewards/frontier_coverage_25": 0.2708742439746857,
"rewards/frontier_coverage_5": 0.0322908416390419,
"rewards/frontier_entropy_batch_reward": -0.29763842225074766,
"signal/accuracy_reward/centered_abs_mean": 0.1304633229970932,
"signal/accuracy_reward/group_std_mean": 0.1730515480041504,
"signal/accuracy_reward/group_zero_std_frac": 0.5000000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0257725954055785,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0652316614985466,
"signal/advantage_abs_mean": 0.7648744463920594,
"signal/advantage_pre_scale_abs_mean": 0.08146732598543167,
"signal/advantage_pre_scale_std": 0.1303176298737526,
"signal/advantage_std": 0.982902467250824,
"signal/brier_reward/centered_abs_mean": 0.11421165615320206,
"signal/brier_reward/group_std_mean": 0.14783188402652742,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18043694496154786,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01142116542905569,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02011672258377075,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030361873283982276,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03180941939353943,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020116724306717517,
"signal/format_reward/centered_abs_mean": 0.006575520941987633,
"signal/format_reward/group_std_mean": 0.01385612040758133,
"signal/format_reward/group_zero_std_frac": 0.9361111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05119709745049476,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0032877604709938167,
"signal/frontier_coverage_0/centered_abs_mean": 0.1728944033384323,
"signal/frontier_coverage_0/group_std_mean": 0.22030304074287416,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03905735611915588,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002472389955073595,
"signal/frontier_coverage_1/centered_abs_mean": 0.172818061709404,
"signal/frontier_coverage_1/group_std_mean": 0.22020695805549623,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03904041945934296,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002471298351883888,
"signal/frontier_coverage_10/centered_abs_mean": 0.06365747526288032,
"signal/frontier_coverage_10/group_std_mean": 0.08029326051473618,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014403184317052365,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009103018906898797,
"signal/frontier_coverage_15/centered_abs_mean": 0.07621516734361648,
"signal/frontier_coverage_15/group_std_mean": 0.09550768882036209,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017270967923104764,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010898768436163665,
"signal/frontier_coverage_20/centered_abs_mean": 0.10459608137607575,
"signal/frontier_coverage_20/group_std_mean": 0.13220926523208618,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02368568480014801,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001495723961852491,
"signal/frontier_coverage_25/centered_abs_mean": 0.13965673446655275,
"signal/frontier_coverage_25/group_std_mean": 0.17762815058231354,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031598026677966115,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019970911787822844,
"signal/frontier_coverage_5/centered_abs_mean": 0.17128887474536897,
"signal/frontier_coverage_5/group_std_mean": 0.2183428555727005,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038694722950458525,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024494309443980457,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3275206506252289,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.393954998254776,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5186434030532837,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032752066105604175,
"step": 560
},
{
"calibration/aurc": 0.11484324392938003,
"calibration/batch_distribution_entropy": 0.9461764735370636,
"calibration/buffer_distribution_entropy": 0.9800952762111569,
"calibration/confidence_entropy": 0.49656747207777396,
"calibration/coverage@0%": 0.13764694219018028,
"calibration/coverage@1%": 0.14077194219018027,
"calibration/coverage@10%": 0.34883286118752554,
"calibration/coverage@15%": 0.8183734403223412,
"calibration/coverage@20%": 0.944526908026836,
"calibration/coverage@25%": 0.9843204977967833,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.1814599271535877,
"calibration/ece": 0.18485625489936197,
"calibration/mean_confidence": 0.6121328205199128,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004947916666666652,
"completions/max_length": 3722.8,
"completions/max_terminated_length": 3722.8,
"completions/mean_length": 829.382568359375,
"completions/mean_terminated_length": 833.634375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 1.3575955300558742,
"grad_norm": 0.0024535465054214,
"learning_rate": 2.8545673076923082e-06,
"loss": -0.011,
"num_tokens": 1459857813.0,
"reward": 1.01730078458786,
"reward_std": 0.10536360442638397,
"rewards/accuracy_reward": 0.7284722328186035,
"rewards/brier_reward": 0.8310370564460754,
"rewards/confidence_uniqueness_reward": 0.9439226388931274,
"rewards/format_reward": 0.9947048664093018,
"rewards/frontier_coverage_0": 0.017816638201475145,
"rewards/frontier_coverage_1": 0.017861245200037956,
"rewards/frontier_coverage_10": 0.047358321771025655,
"rewards/frontier_coverage_15": 0.10612278282642365,
"rewards/frontier_coverage_20": 0.18106147646903992,
"rewards/frontier_coverage_25": 0.2659234404563904,
"rewards/frontier_coverage_5": 0.018120815977454185,
"rewards/frontier_entropy_batch_reward": -0.3113971471786499,
"signal/accuracy_reward/centered_abs_mean": 0.11510416865348816,
"signal/accuracy_reward/group_std_mean": 0.1561041682958603,
"signal/accuracy_reward/group_zero_std_frac": 0.5361111223697662,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8854737877845764,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05755208432674408,
"signal/advantage_abs_mean": 0.7564275026321411,
"signal/advantage_pre_scale_abs_mean": 0.07891413271427154,
"signal/advantage_pre_scale_std": 0.12910031527280807,
"signal/advantage_std": 0.9829351425170898,
"signal/brier_reward/centered_abs_mean": 0.10529440641403198,
"signal/brier_reward/group_std_mean": 0.13557455837726592,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16216840744018554,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010529440827667713,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02240469716489315,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03349938876926899,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034209462255239485,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022404698422178624,
"signal/format_reward/centered_abs_mean": 0.009195963526144624,
"signal/format_reward/group_std_mean": 0.01742637250572443,
"signal/format_reward/group_zero_std_frac": 0.9277777671813965,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06746506839990615,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004597981763072312,
"signal/frontier_coverage_0/centered_abs_mean": 0.15199373960494994,
"signal/frontier_coverage_0/group_std_mean": 0.19706369042396546,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033572905138134955,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021735104732215405,
"signal/frontier_coverage_1/centered_abs_mean": 0.15193078815937042,
"signal/frontier_coverage_1/group_std_mean": 0.19698142111301423,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03355920016765594,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021726103965193032,
"signal/frontier_coverage_10/centered_abs_mean": 0.05758165866136551,
"signal/frontier_coverage_10/group_std_mean": 0.07300378382205963,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012791383638978004,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008234177017584443,
"signal/frontier_coverage_15/centered_abs_mean": 0.07520065009593964,
"signal/frontier_coverage_15/group_std_mean": 0.0933651715517044,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016745933331549168,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010753692826256157,
"signal/frontier_coverage_20/centered_abs_mean": 0.10440513789653778,
"signal/frontier_coverage_20/group_std_mean": 0.13062019646167755,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023217468336224557,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014929935336112977,
"signal/frontier_coverage_25/centered_abs_mean": 0.13932709842920304,
"signal/frontier_coverage_25/group_std_mean": 0.1753379374742508,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030945189669728278,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001992377499118447,
"signal/frontier_coverage_5/centered_abs_mean": 0.15076255798339844,
"signal/frontier_coverage_5/group_std_mean": 0.1955201655626297,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03330030217766762,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021559046115726234,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32999433279037477,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39727323651313784,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5139553189277649,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03299943394958973,
"step": 565
},
{
"calibration/aurc": 0.09963432050211549,
"calibration/batch_distribution_entropy": 0.980695428798073,
"calibration/buffer_distribution_entropy": 0.980368388300457,
"calibration/confidence_entropy": 0.471962507140043,
"calibration/coverage@0%": 0.1962892479198079,
"calibration/coverage@1%": 0.30380368012509074,
"calibration/coverage@10%": 0.6094211586251187,
"calibration/coverage@15%": 0.706083951108505,
"calibration/coverage@20%": 0.7958589867091082,
"calibration/coverage@25%": 0.8836823588540794,
"calibration/coverage@30%": 0.9326827497626626,
"calibration/coverage@5%": 0.48807606662199143,
"calibration/ece": 0.16809385986656594,
"calibration/mean_confidence": 0.5347295725900152,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008767361111111116,
"completions/max_length": 3635.4,
"completions/max_terminated_length": 3635.4,
"completions/mean_length": 893.3748413085938,
"completions/mean_terminated_length": 901.4503173828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 1.3695953800577492,
"grad_norm": 0.0024552466347813606,
"learning_rate": 2.8245192307692307e-06,
"loss": -0.0139,
"num_tokens": 1473233363.0,
"reward": 1.006178867816925,
"reward_std": 0.11279452443122864,
"rewards/accuracy_reward": 0.7065972328186035,
"rewards/brier_reward": 0.8198230504989624,
"rewards/confidence_uniqueness_reward": 0.942148756980896,
"rewards/format_reward": 0.9907986044883728,
"rewards/frontier_coverage_0": 0.026942870020866393,
"rewards/frontier_coverage_1": 0.026963303238153456,
"rewards/frontier_coverage_10": 0.050035931169986725,
"rewards/frontier_coverage_15": 0.10418967604637146,
"rewards/frontier_coverage_20": 0.17468074858188629,
"rewards/frontier_coverage_25": 0.253691965341568,
"rewards/frontier_coverage_5": 0.027084483951330184,
"rewards/frontier_entropy_batch_reward": -0.282056000828743,
"signal/accuracy_reward/centered_abs_mean": 0.12428385615348816,
"signal/accuracy_reward/group_std_mean": 0.1653681844472885,
"signal/accuracy_reward/group_zero_std_frac": 0.5222222328186035,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9815640449523926,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06214192807674408,
"signal/advantage_abs_mean": 0.7617440342903137,
"signal/advantage_pre_scale_abs_mean": 0.08372949510812759,
"signal/advantage_pre_scale_std": 0.14188626110553743,
"signal/advantage_std": 0.9829033493995667,
"signal/brier_reward/centered_abs_mean": 0.11303541958332061,
"signal/brier_reward/group_std_mean": 0.148781681060791,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17894803285598754,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011303541995584965,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02599692568182945,
"signal/confidence_uniqueness_reward/group_std_mean": 0.042301306128501893,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041353125125169754,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002599692577496171,
"signal/format_reward/centered_abs_mean": 0.01422526054084301,
"signal/format_reward/group_std_mean": 0.0282505813986063,
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11385444700717925,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007112630270421505,
"signal/frontier_coverage_0/centered_abs_mean": 0.16342126429080964,
"signal/frontier_coverage_0/group_std_mean": 0.2136039435863495,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03702799454331398,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002336924057453871,
"signal/frontier_coverage_1/centered_abs_mean": 0.1633344203233719,
"signal/frontier_coverage_1/group_std_mean": 0.21349417567253112,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03700846284627914,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023356821853667497,
"signal/frontier_coverage_10/centered_abs_mean": 0.062467949092388154,
"signal/frontier_coverage_10/group_std_mean": 0.07969342619180679,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014144686982035637,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008932916796766221,
"signal/frontier_coverage_15/centered_abs_mean": 0.07396685630083084,
"signal/frontier_coverage_15/group_std_mean": 0.09205316007137299,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01673112381249666,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010577260982245207,
"signal/frontier_coverage_20/centered_abs_mean": 0.10074280351400375,
"signal/frontier_coverage_20/group_std_mean": 0.12560753375291825,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02277975045144558,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014406220987439155,
"signal/frontier_coverage_25/centered_abs_mean": 0.1342226967215538,
"signal/frontier_coverage_25/group_std_mean": 0.16809515953063964,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03034891076385975,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019193845801055431,
"signal/frontier_coverage_5/centered_abs_mean": 0.16212076246738433,
"signal/frontier_coverage_5/group_std_mean": 0.21197022199630738,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03673520609736443,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023183269426226617,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3281023442745209,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39455527663230894,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5189752340316772,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03281023427844047,
"step": 570
},
{
"calibration/aurc": 0.11028525101347983,
"calibration/batch_distribution_entropy": 0.9566430480940576,
"calibration/buffer_distribution_entropy": 0.9804901972981563,
"calibration/confidence_entropy": 0.47551363461978974,
"calibration/coverage@0%": 0.11289720219401884,
"calibration/coverage@1%": 0.20973930745717673,
"calibration/coverage@10%": 0.6089289139266566,
"calibration/coverage@15%": 0.6945457885198006,
"calibration/coverage@20%": 0.7950429915606329,
"calibration/coverage@25%": 0.852454105806849,
"calibration/coverage@30%": 0.897395771365383,
"calibration/coverage@5%": 0.44135904537458914,
"calibration/ece": 0.1606364850593023,
"calibration/mean_confidence": 0.5495700747544723,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013020833333333325,
"completions/max_length": 3925.6,
"completions/max_terminated_length": 3925.6,
"completions/mean_length": 1019.6723388671875,
"completions/mean_terminated_length": 1033.4481079101563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 215.8,
"epoch": 1.3815952300596241,
"grad_norm": 0.002250150078907609,
"learning_rate": 2.7944711538461537e-06,
"loss": -0.029,
"num_tokens": 1488053364.0,
"reward": 1.0047349095344544,
"reward_std": 0.12467042356729507,
"rewards/accuracy_reward": 0.7088541626930237,
"rewards/brier_reward": 0.8231329202651978,
"rewards/confidence_uniqueness_reward": 0.9376375079154968,
"rewards/format_reward": 0.9868923544883728,
"rewards/frontier_coverage_0": 0.02870071791112423,
"rewards/frontier_coverage_1": 0.028726204484701156,
"rewards/frontier_coverage_10": 0.05221306309103966,
"rewards/frontier_coverage_15": 0.10783710926771164,
"rewards/frontier_coverage_20": 0.17996051013469697,
"rewards/frontier_coverage_25": 0.25985406041145326,
"rewards/frontier_coverage_5": 0.02900756411254406,
"rewards/frontier_entropy_batch_reward": -0.29029480218887327,
"signal/accuracy_reward/centered_abs_mean": 0.12949218600988388,
"signal/accuracy_reward/group_std_mean": 0.1760840207338333,
"signal/accuracy_reward/group_zero_std_frac": 0.48888890743255614,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9506262302398681,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06474609300494194,
"signal/advantage_abs_mean": 0.7400188684463501,
"signal/advantage_pre_scale_abs_mean": 0.08855971843004226,
"signal/advantage_pre_scale_std": 0.15420872271060942,
"signal/advantage_std": 0.9829932928085328,
"signal/brier_reward/centered_abs_mean": 0.11222950965166092,
"signal/brier_reward/group_std_mean": 0.15040762722492218,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16671662628650666,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011222951300442218,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03410551249980927,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05924170911312103,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05008783340454102,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034105512779206036,
"signal/format_reward/centered_abs_mean": 0.023562283255159855,
"signal/format_reward/group_std_mean": 0.04704947955906391,
"signal/format_reward/group_zero_std_frac": 0.8,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1705361783504486,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011781141627579927,
"signal/frontier_coverage_0/centered_abs_mean": 0.16117251217365264,
"signal/frontier_coverage_0/group_std_mean": 0.211995929479599,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034183626621961595,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023047670256346464,
"signal/frontier_coverage_1/centered_abs_mean": 0.16109153926372527,
"signal/frontier_coverage_1/group_std_mean": 0.2118909776210785,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03416657708585262,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002303608926013112,
"signal/frontier_coverage_10/centered_abs_mean": 0.06008915230631828,
"signal/frontier_coverage_10/group_std_mean": 0.07626638561487198,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012836772203445434,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008592748898081481,
"signal/frontier_coverage_15/centered_abs_mean": 0.07080269902944565,
"signal/frontier_coverage_15/group_std_mean": 0.08868333101272582,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015173521265387535,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010124785592779517,
"signal/frontier_coverage_20/centered_abs_mean": 0.09740875363349914,
"signal/frontier_coverage_20/group_std_mean": 0.12245101034641266,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020854856446385385,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001392945135012269,
"signal/frontier_coverage_25/centered_abs_mean": 0.13047325909137725,
"signal/frontier_coverage_25/group_std_mean": 0.16472874879837035,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027888312563300134,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018657675478607415,
"signal/frontier_coverage_5/centered_abs_mean": 0.16004208326339722,
"signal/frontier_coverage_5/group_std_mean": 0.2105341762304306,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03394476734101772,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002288601826876402,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3203289210796356,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3883971631526947,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4795925676822662,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032032891362905505,
"step": 575
},
{
"calibration/aurc": 0.09345142525102028,
"calibration/batch_distribution_entropy": 0.9630027341939942,
"calibration/buffer_distribution_entropy": 0.9820467419308796,
"calibration/confidence_entropy": 0.48253184391156057,
"calibration/coverage@0%": 0.15545568829074058,
"calibration/coverage@1%": 0.23777337060842285,
"calibration/coverage@10%": 0.6805700522287482,
"calibration/coverage@15%": 0.7847976560103878,
"calibration/coverage@20%": 0.8642587539374403,
"calibration/coverage@25%": 0.902133177056626,
"calibration/coverage@30%": 0.9395721925133689,
"calibration/coverage@5%": 0.5479745475143784,
"calibration/ece": 0.21899418926214792,
"calibration/mean_confidence": 0.5576070825955319,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.043315972222222235,
"completions/max_length": 3999.4,
"completions/max_terminated_length": 3999.4,
"completions/mean_length": 1091.949755859375,
"completions/mean_terminated_length": 1141.4026123046874,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.2,
"epoch": 1.3935950800614991,
"grad_norm": 0.002100614598020911,
"learning_rate": 2.7644230769230775e-06,
"loss": -0.114,
"num_tokens": 1503724433.0,
"reward": 0.9761302590370178,
"reward_std": 0.1780338317155838,
"rewards/accuracy_reward": 0.7019097208976746,
"rewards/brier_reward": 0.7873495817184448,
"rewards/confidence_uniqueness_reward": 0.9076419115066529,
"rewards/format_reward": 0.9567708492279052,
"rewards/frontier_coverage_0": 0.00892861601896584,
"rewards/frontier_coverage_1": 0.00892244540154934,
"rewards/frontier_coverage_10": 0.047091028094291686,
"rewards/frontier_coverage_15": 0.10459020435810089,
"rewards/frontier_coverage_20": 0.17623171508312224,
"rewards/frontier_coverage_25": 0.25542102158069613,
"rewards/frontier_coverage_5": 0.009725382318720222,
"rewards/frontier_entropy_batch_reward": -0.31445170640945436,
"signal/accuracy_reward/centered_abs_mean": 0.15259331464767456,
"signal/accuracy_reward/group_std_mean": 0.2111268609762192,
"signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8467071652412415,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07629665732383728,
"signal/advantage_abs_mean": 0.710084867477417,
"signal/advantage_pre_scale_abs_mean": 0.12264825254678727,
"signal/advantage_pre_scale_std": 0.21392209231853485,
"signal/advantage_std": 0.9833568692207336,
"signal/brier_reward/centered_abs_mean": 0.14704960882663726,
"signal/brier_reward/group_std_mean": 0.19571054577827454,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16416477262973786,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014704960770905018,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07570276707410813,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1249046117067337,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08455845564603806,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007570276688784361,
"signal/format_reward/centered_abs_mean": 0.0701388880610466,
"signal/format_reward/group_std_mean": 0.1196043387055397,
"signal/format_reward/group_zero_std_frac": 0.5527777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.39078201055526735,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0350694440305233,
"signal/frontier_coverage_0/centered_abs_mean": 0.17277185022830963,
"signal/frontier_coverage_0/group_std_mean": 0.2236211121082306,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.027412646636366843,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024706375785171985,
"signal/frontier_coverage_1/centered_abs_mean": 0.17267104387283325,
"signal/frontier_coverage_1/group_std_mean": 0.22349391877651215,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02739631161093712,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024691958911716937,
"signal/frontier_coverage_10/centered_abs_mean": 0.06385519728064537,
"signal/frontier_coverage_10/group_std_mean": 0.08039165139198304,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.010238087736070156,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009131293511018157,
"signal/frontier_coverage_15/centered_abs_mean": 0.07462313622236252,
"signal/frontier_coverage_15/group_std_mean": 0.0929687038064003,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01205196175724268,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010671108029782772,
"signal/frontier_coverage_20/centered_abs_mean": 0.10192661881446838,
"signal/frontier_coverage_20/group_std_mean": 0.12763854265213012,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016460489854216574,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014575506327673792,
"signal/frontier_coverage_25/centered_abs_mean": 0.1360788583755493,
"signal/frontier_coverage_25/group_std_mean": 0.17146177887916564,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021944852918386458,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019459277391433716,
"signal/frontier_coverage_5/centered_abs_mean": 0.17097091376781465,
"signal/frontier_coverage_5/group_std_mean": 0.2213510900735855,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.027126950025558472,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002444884181022644,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34023687839508054,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4068700850009918,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3833127558231354,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03402368724346161,
"step": 580
},
{
"calibration/aurc": 0.17046566121396417,
"calibration/batch_distribution_entropy": 0.9610891477230847,
"calibration/buffer_distribution_entropy": 0.9820212064514543,
"calibration/confidence_entropy": 0.49697460263110704,
"calibration/coverage@0%": 0.11687982616119916,
"calibration/coverage@1%": 0.17290076856957615,
"calibration/coverage@10%": 0.3596662907420923,
"calibration/coverage@15%": 0.46583535249434255,
"calibration/coverage@20%": 0.6390530315041969,
"calibration/coverage@25%": 0.7664757895868467,
"calibration/coverage@30%": 0.8486663495586914,
"calibration/coverage@5%": 0.27098377467777857,
"calibration/ece": 0.13652749098142433,
"calibration/mean_confidence": 0.5919820919460504,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028993055555555557,
"completions/max_length": 3882.8,
"completions/max_terminated_length": 3882.8,
"completions/mean_length": 1286.2006103515625,
"completions/mean_terminated_length": 1324.7329833984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 208.6,
"epoch": 1.405594930063374,
"grad_norm": 0.0021445208694785833,
"learning_rate": 2.7343750000000004e-06,
"loss": -0.0811,
"num_tokens": 1521667384.0,
"reward": 0.9691243171691895,
"reward_std": 0.16067952960729598,
"rewards/accuracy_reward": 0.6624131798744202,
"rewards/brier_reward": 0.8069814324378968,
"rewards/confidence_uniqueness_reward": 0.9217924475669861,
"rewards/format_reward": 0.9710069298744202,
"rewards/frontier_coverage_0": 0.039838623628020285,
"rewards/frontier_coverage_1": 0.03982721939682961,
"rewards/frontier_coverage_10": 0.04834746643900871,
"rewards/frontier_coverage_15": 0.09708862453699112,
"rewards/frontier_coverage_20": 0.16204800009727477,
"rewards/frontier_coverage_25": 0.23406701982021333,
"rewards/frontier_coverage_5": 0.040019629150629045,
"rewards/frontier_entropy_batch_reward": -0.2991880297660828,
"signal/accuracy_reward/centered_abs_mean": 0.15218641459941865,
"signal/accuracy_reward/group_std_mean": 0.20358789563179017,
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9086845397949219,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07609320729970932,
"signal/advantage_abs_mean": 0.7256081700325012,
"signal/advantage_pre_scale_abs_mean": 0.11406800299882888,
"signal/advantage_pre_scale_std": 0.1945664405822754,
"signal/advantage_std": 0.9832754135131836,
"signal/brier_reward/centered_abs_mean": 0.12671963274478912,
"signal/brier_reward/group_std_mean": 0.17179930806159974,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1513580173254013,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012671963125467301,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05516675487160683,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09555203318595887,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06452755033969879,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005516675394028425,
"signal/format_reward/centered_abs_mean": 0.04739583320915699,
"signal/format_reward/group_std_mean": 0.08717522174119949,
"signal/format_reward/group_zero_std_frac": 0.65,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2736491531133652,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.023697916604578494,
"signal/frontier_coverage_0/centered_abs_mean": 0.15436613261699678,
"signal/frontier_coverage_0/group_std_mean": 0.20032262206077575,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02646334134042263,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022074357373639943,
"signal/frontier_coverage_1/centered_abs_mean": 0.15427806973457336,
"signal/frontier_coverage_1/group_std_mean": 0.2002100557088852,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02644813396036625,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022061764262616634,
"signal/frontier_coverage_10/centered_abs_mean": 0.05655251294374466,
"signal/frontier_coverage_10/group_std_mean": 0.07212142795324325,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0097461000084877,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008087009191513062,
"signal/frontier_coverage_15/centered_abs_mean": 0.07178578078746796,
"signal/frontier_coverage_15/group_std_mean": 0.09074690490961075,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012421418353915215,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010265366523526608,
"signal/frontier_coverage_20/centered_abs_mean": 0.1017922267317772,
"signal/frontier_coverage_20/group_std_mean": 0.12880659401416777,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017606715485453606,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001455628895200789,
"signal/frontier_coverage_25/centered_abs_mean": 0.13876647651195526,
"signal/frontier_coverage_25/group_std_mean": 0.175778591632843,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023976121470332144,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001984360720962286,
"signal/frontier_coverage_5/centered_abs_mean": 0.15271045863628388,
"signal/frontier_coverage_5/group_std_mean": 0.19827630817890168,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02617722600698471,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021837596548721196,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33504435420036316,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4021625995635986,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4056344449520111,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033504435792565346,
"step": 585
},
{
"calibration/aurc": 0.13363501840101474,
"calibration/batch_distribution_entropy": 0.927256228618109,
"calibration/buffer_distribution_entropy": 0.9812668042647432,
"calibration/confidence_entropy": 0.488665506297988,
"calibration/coverage@0%": 0.22362701483296807,
"calibration/coverage@1%": 0.2689885986026016,
"calibration/coverage@10%": 0.43250913802016655,
"calibration/coverage@15%": 0.6891102145292469,
"calibration/coverage@20%": 0.7362514182042972,
"calibration/coverage@25%": 0.7613508744799712,
"calibration/coverage@30%": 0.9244356505657093,
"calibration/coverage@5%": 0.3492814081365009,
"calibration/ece": 0.15907323105091412,
"calibration/mean_confidence": 0.6248761445497155,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004600694444444442,
"completions/max_length": 3681.4,
"completions/max_terminated_length": 3681.4,
"completions/mean_length": 1218.0521728515625,
"completions/mean_terminated_length": 1223.7986083984374,
"completions/min_length": 0.0,
"completions/min_terminated_length": 205.4,
"epoch": 1.417594780065249,
"grad_norm": 0.0023973125498741865,
"learning_rate": 2.7043269230769233e-06,
"loss": -0.0166,
"num_tokens": 1538822801.0,
"reward": 0.9975666046142578,
"reward_std": 0.10440057963132858,
"rewards/accuracy_reward": 0.68828125,
"rewards/brier_reward": 0.8343516826629639,
"rewards/confidence_uniqueness_reward": 0.9442711710929871,
"rewards/format_reward": 0.9953992962837219,
"rewards/frontier_coverage_0": 0.04567217640578747,
"rewards/frontier_coverage_1": 0.04569807052612305,
"rewards/frontier_coverage_10": 0.05116933360695839,
"rewards/frontier_coverage_15": 0.10133900344371796,
"rewards/frontier_coverage_20": 0.1687961131334305,
"rewards/frontier_coverage_25": 0.2452457994222641,
"rewards/frontier_coverage_5": 0.04587310701608658,
"rewards/frontier_entropy_batch_reward": -0.32200189828872683,
"signal/accuracy_reward/centered_abs_mean": 0.11290690153837205,
"signal/accuracy_reward/group_std_mean": 0.15099144130945205,
"signal/accuracy_reward/group_zero_std_frac": 0.569444453716278,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.925674319267273,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05645345076918602,
"signal/advantage_abs_mean": 0.7637529969215393,
"signal/advantage_pre_scale_abs_mean": 0.07765910625457764,
"signal/advantage_pre_scale_std": 0.12909533083438873,
"signal/advantage_std": 0.9828407287597656,
"signal/brier_reward/centered_abs_mean": 0.10314983427524567,
"signal/brier_reward/group_std_mean": 0.136023673415184,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1698082685470581,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010314983315765858,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021420668810606003,
"signal/confidence_uniqueness_reward/group_std_mean": 0.034321589022874834,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035426610708236696,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002142066927626729,
"signal/format_reward/centered_abs_mean": 0.008393011963926255,
"signal/format_reward/group_std_mean": 0.018513968773186208,
"signal/format_reward/group_zero_std_frac": 0.9138888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06900344025343656,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004196505981963128,
"signal/frontier_coverage_0/centered_abs_mean": 0.14370577037334442,
"signal/frontier_coverage_0/group_std_mean": 0.18573465943336487,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0336445227265358,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020549925277009605,
"signal/frontier_coverage_1/centered_abs_mean": 0.14363015294075013,
"signal/frontier_coverage_1/group_std_mean": 0.18563660383224487,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033626696467399596,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002053911192342639,
"signal/frontier_coverage_10/centered_abs_mean": 0.05578533932566643,
"signal/frontier_coverage_10/group_std_mean": 0.07023323774337768,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01311029139906168,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007977303350344301,
"signal/frontier_coverage_15/centered_abs_mean": 0.07076951265335082,
"signal/frontier_coverage_15/group_std_mean": 0.08869308978319168,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01673793625086546,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001012004038784653,
"signal/frontier_coverage_20/centered_abs_mean": 0.09809644967317581,
"signal/frontier_coverage_20/group_std_mean": 0.12331822216510772,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02323850505053997,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014027792029082775,
"signal/frontier_coverage_25/centered_abs_mean": 0.13125519305467606,
"signal/frontier_coverage_25/group_std_mean": 0.1657370448112488,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03108687661588192,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018769492628052831,
"signal/frontier_coverage_5/centered_abs_mean": 0.14228105694055557,
"signal/frontier_coverage_5/group_std_mean": 0.18392003774642945,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033308600261807444,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002034619217738509,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33420050144195557,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3994639039039612,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5513591527938843,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03342005014419556,
"step": 590
},
{
"calibration/aurc": 0.07480890651703404,
"calibration/batch_distribution_entropy": 0.9828328527315134,
"calibration/buffer_distribution_entropy": 0.9817377142431003,
"calibration/confidence_entropy": 0.4830405262105046,
"calibration/coverage@0%": 0.23835041511170654,
"calibration/coverage@1%": 0.317058802840166,
"calibration/coverage@10%": 0.7133245133487348,
"calibration/coverage@15%": 0.8062275869296771,
"calibration/coverage@20%": 0.8965332861149463,
"calibration/coverage@25%": 0.9566224215001435,
"calibration/coverage@30%": 0.985378590078329,
"calibration/coverage@5%": 0.5676621045958471,
"calibration/ece": 0.21566845079895333,
"calibration/mean_confidence": 0.5305456805763834,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0018229166666666962,
"completions/max_length": 3201.4,
"completions/max_terminated_length": 3201.4,
"completions/mean_length": 1157.2978271484376,
"completions/mean_terminated_length": 1159.3373779296876,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.2,
"epoch": 1.429594630067124,
"grad_norm": 0.00260770577006042,
"learning_rate": 2.6742788461538467e-06,
"loss": 0.0084,
"num_tokens": 1555252088.0,
"reward": 1.0056678652763367,
"reward_std": 0.10079189985990525,
"rewards/accuracy_reward": 0.6982638835906982,
"rewards/brier_reward": 0.8137720704078675,
"rewards/confidence_uniqueness_reward": 0.9492265462875367,
"rewards/format_reward": 0.9981770873069763,
"rewards/frontier_coverage_0": 0.026146640256047248,
"rewards/frontier_coverage_1": 0.02614601030945778,
"rewards/frontier_coverage_10": 0.0456491582095623,
"rewards/frontier_coverage_15": 0.0906353935599327,
"rewards/frontier_coverage_20": 0.15246600657701492,
"rewards/frontier_coverage_25": 0.2238086700439453,
"rewards/frontier_coverage_5": 0.026459738612174988,
"rewards/frontier_entropy_batch_reward": -0.27308249771595,
"signal/accuracy_reward/centered_abs_mean": 0.12478298544883729,
"signal/accuracy_reward/group_std_mean": 0.1637239784002304,
"signal/accuracy_reward/group_zero_std_frac": 0.5333333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.070089840888977,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06239149272441864,
"signal/advantage_abs_mean": 0.7668791890144349,
"signal/advantage_pre_scale_abs_mean": 0.07820883989334107,
"signal/advantage_pre_scale_std": 0.12572188526391984,
"signal/advantage_std": 0.982763123512268,
"signal/brier_reward/centered_abs_mean": 0.10813791900873185,
"signal/brier_reward/group_std_mean": 0.13981907367706298,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1857350766658783,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010813792422413825,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015965032763779162,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022816429287195204,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027523915842175485,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015965032856911422,
"signal/format_reward/centered_abs_mean": 0.003271484305150807,
"signal/format_reward/group_std_mean": 0.007144530490040779,
"signal/format_reward/group_zero_std_frac": 0.9666666507720947,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.028176695853471757,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0016357421525754034,
"signal/frontier_coverage_0/centered_abs_mean": 0.16511012017726898,
"signal/frontier_coverage_0/group_std_mean": 0.21421845853328705,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0404993049800396,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023610747884958982,
"signal/frontier_coverage_1/centered_abs_mean": 0.16507968604564666,
"signal/frontier_coverage_1/group_std_mean": 0.21418104469776153,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0404917910695076,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023606396745890377,
"signal/frontier_coverage_10/centered_abs_mean": 0.058770237118005754,
"signal/frontier_coverage_10/group_std_mean": 0.0750571459531784,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014474144019186497,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008404143969528377,
"signal/frontier_coverage_15/centered_abs_mean": 0.06682768538594246,
"signal/frontier_coverage_15/group_std_mean": 0.08372382670640946,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016561147198081015,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009556358796544373,
"signal/frontier_coverage_20/centered_abs_mean": 0.09060783386230468,
"signal/frontier_coverage_20/group_std_mean": 0.11350671499967575,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022481374442577362,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012956920312717558,
"signal/frontier_coverage_25/centered_abs_mean": 0.12240231782197952,
"signal/frontier_coverage_25/group_std_mean": 0.15335234999656677,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030351197719573973,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017503531882539392,
"signal/frontier_coverage_5/centered_abs_mean": 0.16379218697547912,
"signal/frontier_coverage_5/group_std_mean": 0.2125529944896698,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04017730951309204,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023422284051775933,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3053570449352264,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37301817536354065,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.526164972782135,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030535706505179405,
"step": 595
},
{
"calibration/aurc": 0.11776333779549893,
"calibration/batch_distribution_entropy": 0.9177009565648232,
"calibration/buffer_distribution_entropy": 0.9813989915403323,
"calibration/confidence_entropy": 0.48154410169959566,
"calibration/coverage@0%": 0.15052083333333333,
"calibration/coverage@1%": 0.2572916666666667,
"calibration/coverage@10%": 0.6625,
"calibration/coverage@15%": 0.7395833333333333,
"calibration/coverage@20%": 0.7776041666666667,
"calibration/coverage@25%": 0.8338541666666668,
"calibration/coverage@30%": 0.8661458333333332,
"calibration/coverage@5%": 0.37395833333333334,
"calibration/ece": 0.15964555208333336,
"calibration/mean_confidence": 0.6478661145833333,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0007812500000000222,
"completions/max_length": 3292.2,
"completions/max_terminated_length": 3292.2,
"completions/mean_length": 1060.505810546875,
"completions/mean_terminated_length": 1061.3367553710937,
"completions/min_length": 36.4,
"completions/min_terminated_length": 216.2,
"epoch": 1.441594480068999,
"grad_norm": 0.0025479544419795275,
"learning_rate": 2.6442307692307696e-06,
"loss": 0.0073,
"num_tokens": 1570563899.0,
"reward": 1.020909571647644,
"reward_std": 0.10144262760877609,
"rewards/accuracy_reward": 0.729600703716278,
"rewards/brier_reward": 0.8360188364982605,
"rewards/confidence_uniqueness_reward": 0.9478083848953247,
"rewards/format_reward": 0.99921875,
"rewards/frontier_coverage_0": 0.0182599871623097,
"rewards/frontier_coverage_1": 0.018277949932962657,
"rewards/frontier_coverage_10": 0.05050070583820343,
"rewards/frontier_coverage_15": 0.10800392180681229,
"rewards/frontier_coverage_20": 0.18200758695602418,
"rewards/frontier_coverage_25": 0.2649056166410446,
"rewards/frontier_coverage_5": 0.018470912738121115,
"rewards/frontier_entropy_batch_reward": -0.31326996684074404,
"signal/accuracy_reward/centered_abs_mean": 0.12102321982383728,
"signal/accuracy_reward/group_std_mean": 0.1607717901468277,
"signal/accuracy_reward/group_zero_std_frac": 0.5416666686534881,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.993973171710968,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06051160991191864,
"signal/advantage_abs_mean": 0.7755637049674988,
"signal/advantage_pre_scale_abs_mean": 0.07838105112314224,
"signal/advantage_pre_scale_std": 0.12489996254444122,
"signal/advantage_std": 0.9828433275222779,
"signal/brier_reward/centered_abs_mean": 0.10264720171689987,
"signal/brier_reward/group_std_mean": 0.13262540996074676,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1685381680727005,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01026472058147192,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015869051963090897,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02228650264441967,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026085112243890762,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015869052149355412,
"signal/format_reward/centered_abs_mean": 0.0015136718400754034,
"signal/format_reward/group_std_mean": 0.0044194171205163,
"signal/format_reward/group_zero_std_frac": 0.9749999880790711,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012289304099977017,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359200377017,
"signal/frontier_coverage_0/centered_abs_mean": 0.1449378103017807,
"signal/frontier_coverage_0/group_std_mean": 0.189239040017128,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03399923667311668,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020726106828078627,
"signal/frontier_coverage_1/centered_abs_mean": 0.14487462043762206,
"signal/frontier_coverage_1/group_std_mean": 0.18915866911411286,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033984321355819705,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020717070437967777,
"signal/frontier_coverage_10/centered_abs_mean": 0.056587740778923035,
"signal/frontier_coverage_10/group_std_mean": 0.07166957706212998,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013312225975096226,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008092047180980444,
"signal/frontier_coverage_15/centered_abs_mean": 0.07792377918958664,
"signal/frontier_coverage_15/group_std_mean": 0.09718545377254487,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01838395856320858,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001114309998229146,
"signal/frontier_coverage_20/centered_abs_mean": 0.10968948751688004,
"signal/frontier_coverage_20/group_std_mean": 0.13686617612838745,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02588742785155773,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015685596736148,
"signal/frontier_coverage_25/centered_abs_mean": 0.14639344513416291,
"signal/frontier_coverage_25/group_std_mean": 0.1832552284002304,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03454747945070267,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002093426208011806,
"signal/frontier_coverage_5/centered_abs_mean": 0.1441301167011261,
"signal/frontier_coverage_5/group_std_mean": 0.18819935619831085,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03380856290459633,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002061060653068125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3321432411670685,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3983724594116211,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5467443525791168,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0332143247127533,
"step": 600
},
{
"epoch": 1.441594480068999,
"eval_calibration/aurc": 0.11802945623865287,
"eval_calibration/batch_distribution_entropy": 0.9366906879155904,
"eval_calibration/buffer_distribution_entropy": 0.9812793383792132,
"eval_calibration/confidence_entropy": 0.5005515326063407,
"eval_calibration/coverage@0%": 0.2916666666666667,
"eval_calibration/coverage@1%": 0.2916666666666667,
"eval_calibration/coverage@10%": 0.6197916666666666,
"eval_calibration/coverage@15%": 0.7239583333333334,
"eval_calibration/coverage@20%": 0.796875,
"eval_calibration/coverage@25%": 0.8854166666666666,
"eval_calibration/coverage@30%": 0.984375,
"eval_calibration/coverage@5%": 0.3802083333333333,
"eval_calibration/ece": 0.18726786858974356,
"eval_calibration/mean_confidence": 0.5804040064102564,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 2411.8333333333335,
"eval_completions/max_terminated_length": 2411.8333333333335,
"eval_completions/mean_length": 993.3123575846354,
"eval_completions/mean_terminated_length": 993.3123575846354,
"eval_completions/min_length": 241.83333333333334,
"eval_completions/min_terminated_length": 241.83333333333334,
"eval_loss": 0.0,
"eval_num_tokens": 1570563899.0,
"eval_reward": 0.9275561968485514,
"eval_reward_std": 0.23124410212039948,
"eval_rewards/accuracy_reward": 0.6875,
"eval_rewards/brier_reward": 0.8379482428232828,
"eval_rewards/confidence_uniqueness_reward": 0.8963758647441864,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_coverage_0": 0.04966919838140408,
"eval_rewards/frontier_coverage_1": 0.049654243824382625,
"eval_rewards/frontier_coverage_10": 0.055118689934412636,
"eval_rewards/frontier_coverage_15": 0.10419152304530144,
"eval_rewards/frontier_coverage_20": 0.17086196939150491,
"eval_rewards/frontier_coverage_25": 0.24627631157636642,
"eval_rewards/frontier_coverage_5": 0.04966597332774351,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 131.1254,
"eval_samples_per_second": 7.626,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4157986144224803,
"eval_signal/accuracy_reward/group_std_mean": 0.46200739840666455,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9027682542800903,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20789930721124014,
"eval_signal/advantage_abs_mean": 0.8828665316104889,
"eval_signal/advantage_pre_scale_abs_mean": 0.20522412161032358,
"eval_signal/advantage_pre_scale_std": 0.22870965053637823,
"eval_signal/advantage_std": 0.9863951603571574,
"eval_signal/brier_reward/centered_abs_mean": 0.1551913395524025,
"eval_signal/brier_reward/group_std_mean": 0.21014980723460516,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06728844096263249,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.015519133924196163,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.041531032572189965,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04975000210106373,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018028488382697105,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041531034124394255,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2648828824361165,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3670547952254613,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01645986953129371,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037878251556927958,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.26476379483938217,
"eval_signal/frontier_coverage_1/group_std_mean": 0.36690954864025116,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016452479176223278,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037861222323651114,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.07583417370915413,
"eval_signal/frontier_coverage_10/group_std_mean": 0.09882033616304398,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004711338396494587,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010844287074481447,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.11589070161183675,
"eval_signal/frontier_coverage_15/group_std_mean": 0.1468774676322937,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0072002453574289875,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016572370271508892,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.197922649482886,
"eval_signal/frontier_coverage_20/group_std_mean": 0.24237419913212457,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01229737838730216,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028302938444539905,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.28636286159356433,
"eval_signal/frontier_coverage_25/group_std_mean": 0.34704581399758655,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017790169765551884,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00409498888378342,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2635475993156433,
"eval_signal/frontier_coverage_5/group_std_mean": 0.36541228493054706,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016376903591056664,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037687306369965277,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.046,
"step": 600
},
{
"epoch": 1.441594480068999,
"step": 600,
"train_probe_calibration/aurc": 0.11675127139560688,
"train_probe_calibration/batch_distribution_entropy": 0.9230508777439455,
"train_probe_calibration/buffer_distribution_entropy": 0.9812899727047678,
"train_probe_calibration/confidence_entropy": 0.48371354175712417,
"train_probe_calibration/coverage@0%": 0.34375,
"train_probe_calibration/coverage@1%": 0.34375,
"train_probe_calibration/coverage@10%": 0.515625,
"train_probe_calibration/coverage@15%": 0.7083333333333334,
"train_probe_calibration/coverage@20%": 0.8385416666666666,
"train_probe_calibration/coverage@25%": 0.9739583333333334,
"train_probe_calibration/coverage@30%": 0.9947916666666666,
"train_probe_calibration/coverage@5%": 0.3489583333333333,
"train_probe_calibration/ece": 0.21593437499999998,
"train_probe_calibration/mean_confidence": 0.5957906249999999,
"train_probe_completions/clipped_ratio": 0.0008680555555555617,
"train_probe_completions/max_length": 2463.8333333333335,
"train_probe_completions/max_terminated_length": 2463.8333333333335,
"train_probe_completions/mean_length": 957.962880452474,
"train_probe_completions/mean_terminated_length": 958.8002115885416,
"train_probe_completions/min_length": 142.83333333333334,
"train_probe_completions/min_terminated_length": 171.83333333333334,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1570563899.0,
"train_probe_reward": 0.9545861085255941,
"train_probe_reward_std": 0.21626246720552444,
"train_probe_rewards/accuracy_reward": 0.7447916766007742,
"train_probe_rewards/brier_reward": 0.8374614318211874,
"train_probe_rewards/confidence_uniqueness_reward": 0.89298415184021,
"train_probe_rewards/format_reward": 0.9991319477558136,
"train_probe_rewards/frontier_coverage_0": 0.010258166119456291,
"train_probe_rewards/frontier_coverage_1": 0.010285623526821533,
"train_probe_rewards/frontier_coverage_10": 0.05134387003878752,
"train_probe_rewards/frontier_coverage_15": 0.11345388740301132,
"train_probe_rewards/frontier_coverage_20": 0.19093378633260727,
"train_probe_rewards/frontier_coverage_25": 0.2771032725771268,
"train_probe_rewards/frontier_coverage_5": 0.010462871704172963,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9991319477558136,
"train_probe_runtime": 143.1787,
"train_probe_samples_per_second": 6.984,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3704427083333333,
"train_probe_signal/accuracy_reward/group_std_mean": 0.43535151580969494,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8639881908893585,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18522135416666666,
"train_probe_signal/advantage_abs_mean": 0.8328354756037394,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.18153581271568933,
"train_probe_signal/advantage_pre_scale_std": 0.21439906706412634,
"train_probe_signal/advantage_std": 0.9863629341125488,
"train_probe_signal/brier_reward/centered_abs_mean": 0.15394766877094904,
"train_probe_signal/brier_reward/group_std_mean": 0.20854342232147852,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07198699191212654,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.015394768056770166,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04392562434077263,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.054374140997727714,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020567491340140503,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004392562434077263,
"train_probe_signal/format_reward/centered_abs_mean": 0.0016818575871487458,
"train_probe_signal/format_reward/group_std_mean": 0.0049104637776811915,
"train_probe_signal/format_reward/group_zero_std_frac": 0.9722222288449606,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037552444264292717,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0008409287935743729,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.25623046855131787,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.36612696945667267,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.017111041583120823,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036640956920261183,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.256125142176946,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.36598806579907733,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.017104018479585648,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036625893941769996,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07513122757275899,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.09937256947159767,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005020403225595753,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010743765354466934,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11616303771734238,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.14261490354935327,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007758967267970244,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016611314301068585,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1925310716032982,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.23078140864769617,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012859225738793612,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002753194266309341,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.27361434201399487,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.32750125726064044,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01827398408204317,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003912684895719091,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.25470831741889316,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3641922523578008,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.017009020938227575,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003642329053642849,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0016818575871487458,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0049104637776811915,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9722222288449606,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007510488697638115,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0001681857587148746,
"train_probe_steps_per_second": 0.042
},
{
"calibration/aurc": 0.09558923489699658,
"calibration/batch_distribution_entropy": 0.9584420972144472,
"calibration/buffer_distribution_entropy": 0.9811046622030893,
"calibration/confidence_entropy": 0.48270719378053056,
"calibration/coverage@0%": 0.0750751573603887,
"calibration/coverage@1%": 0.1750751573603887,
"calibration/coverage@10%": 0.6530113270354913,
"calibration/coverage@15%": 0.7547184024279928,
"calibration/coverage@20%": 0.8532883031633121,
"calibration/coverage@25%": 0.9320883381132516,
"calibration/coverage@30%": 0.9801646673245479,
"calibration/coverage@5%": 0.43802802890908216,
"calibration/ece": 0.14332990975185758,
"calibration/mean_confidence": 0.6008687151115122,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0018229166666666962,
"completions/max_length": 3653.0,
"completions/max_terminated_length": 3653.0,
"completions/mean_length": 943.5162231445313,
"completions/mean_terminated_length": 945.2334350585937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 120.2,
"epoch": 1.453594330070874,
"grad_norm": 0.002810286357998848,
"learning_rate": 2.6141826923076926e-06,
"loss": 0.0089,
"num_tokens": 1584531990.0,
"reward": 1.0092534184455872,
"reward_std": 0.10346733331680298,
"rewards/accuracy_reward": 0.7048611044883728,
"rewards/brier_reward": 0.8292035460472107,
"rewards/confidence_uniqueness_reward": 0.9482634544372559,
"rewards/format_reward": 0.9981770753860474,
"rewards/frontier_coverage_0": 0.02842591591179371,
"rewards/frontier_coverage_1": 0.028446278348565102,
"rewards/frontier_coverage_10": 0.04861754775047302,
"rewards/frontier_coverage_15": 0.09893043637275696,
"rewards/frontier_coverage_20": 0.16540935039520263,
"rewards/frontier_coverage_25": 0.2422512799501419,
"rewards/frontier_coverage_5": 0.028660116344690324,
"rewards/frontier_entropy_batch_reward": -0.2917499512434006,
"signal/accuracy_reward/centered_abs_mean": 0.12080078125,
"signal/accuracy_reward/group_std_mean": 0.1602880299091339,
"signal/accuracy_reward/group_zero_std_frac": 0.5361111223697662,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9658271431922912,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.060400390625,
"signal/advantage_abs_mean": 0.7683446645736695,
"signal/advantage_pre_scale_abs_mean": 0.07905568778514863,
"signal/advantage_pre_scale_std": 0.12629517465829848,
"signal/advantage_std": 0.9828693866729736,
"signal/brier_reward/centered_abs_mean": 0.1080152839422226,
"signal/brier_reward/group_std_mean": 0.14062872529029846,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17367709279060364,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010801529139280319,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016668078303337098,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024647758901119234,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02698330543935299,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016668078489601613,
"signal/format_reward/centered_abs_mean": 0.003390841977670789,
"signal/format_reward/group_std_mean": 0.00823095440864563,
"signal/format_reward/group_zero_std_frac": 0.9583333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.026670993864536287,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0016954209888353944,
"signal/frontier_coverage_0/centered_abs_mean": 0.15668127536773682,
"signal/frontier_coverage_0/group_std_mean": 0.20213212072849274,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035876476764678956,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022405422292649745,
"signal/frontier_coverage_1/centered_abs_mean": 0.15663088858127594,
"signal/frontier_coverage_1/group_std_mean": 0.202065372467041,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03586488664150238,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022398216184228658,
"signal/frontier_coverage_10/centered_abs_mean": 0.05823779553174972,
"signal/frontier_coverage_10/group_std_mean": 0.07387124150991439,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013428068906068801,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008328004740178585,
"signal/frontier_coverage_15/centered_abs_mean": 0.07526019364595413,
"signal/frontier_coverage_15/group_std_mean": 0.09428713768720627,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017477550357580186,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010762207210063934,
"signal/frontier_coverage_20/centered_abs_mean": 0.1047041043639183,
"signal/frontier_coverage_20/group_std_mean": 0.13193972706794738,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024335138872265814,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014972686767578125,
"signal/frontier_coverage_25/centered_abs_mean": 0.14114450812339782,
"signal/frontier_coverage_25/group_std_mean": 0.17838573157787324,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.032783514633774755,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020183662883937357,
"signal/frontier_coverage_5/centered_abs_mean": 0.15568934231996537,
"signal/frontier_coverage_5/group_std_mean": 0.20089252889156342,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03564789295196533,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022263576043769716,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3193602502346039,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3889504611492157,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5186540305614471,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03193602599203586,
"step": 605
},
{
"calibration/aurc": 0.16828026211478242,
"calibration/batch_distribution_entropy": 0.9673414895621573,
"calibration/buffer_distribution_entropy": 0.9803350494367313,
"calibration/confidence_entropy": 0.4826084238676106,
"calibration/coverage@0%": 0.09375000000000001,
"calibration/coverage@1%": 0.10833333333333332,
"calibration/coverage@10%": 0.4005208333333333,
"calibration/coverage@15%": 0.5015625,
"calibration/coverage@20%": 0.5817708333333333,
"calibration/coverage@25%": 0.7057291666666666,
"calibration/coverage@30%": 0.8677083333333332,
"calibration/coverage@5%": 0.31614583333333335,
"calibration/ece": 0.17711151736111114,
"calibration/mean_confidence": 0.5755484201388888,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005208333333333482,
"completions/max_length": 2950.2,
"completions/max_terminated_length": 2950.2,
"completions/mean_length": 807.0048583984375,
"completions/mean_terminated_length": 807.4189697265625,
"completions/min_length": 15.4,
"completions/min_terminated_length": 88.6,
"epoch": 1.465594180072749,
"grad_norm": 0.0028614369221031666,
"learning_rate": 2.584134615384616e-06,
"loss": 0.0052,
"num_tokens": 1596957678.0,
"reward": 1.0093258619308472,
"reward_std": 0.10231070816516877,
"rewards/accuracy_reward": 0.7103298664093017,
"rewards/brier_reward": 0.8057976961135864,
"rewards/confidence_uniqueness_reward": 0.9493979334831237,
"rewards/format_reward": 0.9985243082046509,
"rewards/frontier_coverage_0": 0.003922509960830212,
"rewards/frontier_coverage_1": 0.003951227106153965,
"rewards/frontier_coverage_10": 0.04210040867328644,
"rewards/frontier_coverage_15": 0.09112356156110764,
"rewards/frontier_coverage_20": 0.1539991855621338,
"rewards/frontier_coverage_25": 0.22835943698883057,
"rewards/frontier_coverage_5": 0.004323094466235489,
"rewards/frontier_entropy_batch_reward": -0.28168027102947235,
"signal/accuracy_reward/centered_abs_mean": 0.12170681655406952,
"signal/accuracy_reward/group_std_mean": 0.16457101106643676,
"signal/accuracy_reward/group_zero_std_frac": 0.5194444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9728019595146179,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06085340827703476,
"signal/advantage_abs_mean": 0.7609669446945191,
"signal/advantage_pre_scale_abs_mean": 0.07733545005321503,
"signal/advantage_pre_scale_std": 0.12450267225503922,
"signal/advantage_std": 0.9828919887542724,
"signal/brier_reward/centered_abs_mean": 0.11052588373422623,
"signal/brier_reward/group_std_mean": 0.14374896585941316,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17655435502529143,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011052588745951653,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015836169011890888,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022552402690052986,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02531985826790333,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015836169477552176,
"signal/format_reward/centered_abs_mean": 0.002718098950572312,
"signal/format_reward/group_std_mean": 0.006266768835484981,
"signal/format_reward/group_zero_std_frac": 0.9694444298744201,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02169901877641678,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001359049475286156,
"signal/frontier_coverage_0/centered_abs_mean": 0.15925846099853516,
"signal/frontier_coverage_0/group_std_mean": 0.2087152421474457,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0364031545817852,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022773958742618563,
"signal/frontier_coverage_1/centered_abs_mean": 0.15921551287174224,
"signal/frontier_coverage_1/group_std_mean": 0.2086589068174362,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0363933652639389,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002276781853288412,
"signal/frontier_coverage_10/centered_abs_mean": 0.0601221852004528,
"signal/frontier_coverage_10/group_std_mean": 0.0764574259519577,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013739870116114616,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008597472333349288,
"signal/frontier_coverage_15/centered_abs_mean": 0.07345416396856308,
"signal/frontier_coverage_15/group_std_mean": 0.09185772836208343,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016773372143507003,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010503945406526328,
"signal/frontier_coverage_20/centered_abs_mean": 0.10081402510404587,
"signal/frontier_coverage_20/group_std_mean": 0.1262542188167572,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023017995804548264,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014416405465453863,
"signal/frontier_coverage_25/centered_abs_mean": 0.13552749156951904,
"signal/frontier_coverage_25/group_std_mean": 0.17060936391353607,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03094673380255699,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019380431389436127,
"signal/frontier_coverage_5/centered_abs_mean": 0.15849049389362335,
"signal/frontier_coverage_5/group_std_mean": 0.20773231983184814,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03622789680957794,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022664140444248913,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31973678469657896,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38735169768333433,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5107354879379272,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03197368010878563,
"step": 610
},
{
"calibration/aurc": 0.17277663233935256,
"calibration/batch_distribution_entropy": 0.9765311373401812,
"calibration/buffer_distribution_entropy": 0.9808250588163799,
"calibration/confidence_entropy": 0.48948248943083283,
"calibration/coverage@0%": 0.005729166666666666,
"calibration/coverage@1%": 0.005729166666666666,
"calibration/coverage@10%": 0.2145833333333333,
"calibration/coverage@15%": 0.5979166666666667,
"calibration/coverage@20%": 0.7630208333333333,
"calibration/coverage@25%": 0.8369791666666668,
"calibration/coverage@30%": 0.9041666666666666,
"calibration/coverage@5%": 0.005729166666666666,
"calibration/ece": 0.1787409583333333,
"calibration/mean_confidence": 0.5740234166666667,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00017361111111111605,
"completions/max_length": 2826.0,
"completions/max_terminated_length": 2826.0,
"completions/mean_length": 790.60712890625,
"completions/mean_terminated_length": 790.7543823242188,
"completions/min_length": 98.6,
"completions/min_terminated_length": 133.6,
"epoch": 1.477594030074624,
"grad_norm": 0.003204671898856759,
"learning_rate": 2.554086538461539e-06,
"loss": 0.0008,
"num_tokens": 1609145056.0,
"reward": 1.0107253670692444,
"reward_std": 0.10107671320438386,
"rewards/accuracy_reward": 0.7134548544883728,
"rewards/brier_reward": 0.8200236558914185,
"rewards/confidence_uniqueness_reward": 0.9490665197372437,
"rewards/format_reward": 0.9998263835906982,
"rewards/frontier_coverage_0": 0.016171068139374255,
"rewards/frontier_coverage_1": 0.016171068139374255,
"rewards/frontier_coverage_10": 0.044859865307807924,
"rewards/frontier_coverage_15": 0.09566431641578674,
"rewards/frontier_coverage_20": 0.16115307211875915,
"rewards/frontier_coverage_25": 0.23886812329292298,
"rewards/frontier_coverage_5": 0.01635436974465847,
"rewards/frontier_entropy_batch_reward": -0.3125044822692871,
"signal/accuracy_reward/centered_abs_mean": 0.12219509482383728,
"signal/accuracy_reward/group_std_mean": 0.16446252465248107,
"signal/accuracy_reward/group_zero_std_frac": 0.5138889074325561,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.986894679069519,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06109754741191864,
"signal/advantage_abs_mean": 0.769283926486969,
"signal/advantage_pre_scale_abs_mean": 0.07805669158697129,
"signal/advantage_pre_scale_std": 0.1223902866244316,
"signal/advantage_std": 0.9828672289848328,
"signal/brier_reward/centered_abs_mean": 0.11475347429513931,
"signal/brier_reward/group_std_mean": 0.14670601189136506,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1857527107000351,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011475348100066184,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014028819277882576,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017688148841261863,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02275208830833435,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014028819277882576,
"signal/format_reward/centered_abs_mean": 0.0003255208255723119,
"signal/format_reward/group_std_mean": 0.0006831518840044737,
"signal/format_reward/group_zero_std_frac": 0.9972222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025148998945951464,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00016276041278615594,
"signal/frontier_coverage_0/centered_abs_mean": 0.15996686220169068,
"signal/frontier_coverage_0/group_std_mean": 0.2047890156507492,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03700179383158684,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022875262191519143,
"signal/frontier_coverage_1/centered_abs_mean": 0.15996686220169068,
"signal/frontier_coverage_1/group_std_mean": 0.2047890156507492,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03700179383158684,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022875262191519143,
"signal/frontier_coverage_10/centered_abs_mean": 0.061971521377563475,
"signal/frontier_coverage_10/group_std_mean": 0.07828292399644851,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01436650361865759,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008861927315592765,
"signal/frontier_coverage_15/centered_abs_mean": 0.07821848094463349,
"signal/frontier_coverage_15/group_std_mean": 0.09751923680305481,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01814446821808815,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011185242561623453,
"signal/frontier_coverage_20/centered_abs_mean": 0.10835776478052139,
"signal/frontier_coverage_20/group_std_mean": 0.13570416867733,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02512829452753067,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015495160361751914,
"signal/frontier_coverage_25/centered_abs_mean": 0.14593692719936371,
"signal/frontier_coverage_25/group_std_mean": 0.18364288806915283,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03382998965680599,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002086897986009717,
"signal/frontier_coverage_5/centered_abs_mean": 0.1591697096824646,
"signal/frontier_coverage_5/group_std_mean": 0.20380557775497438,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036816838383674624,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002276126807555556,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32349973320961,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3918896377086639,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5253556430339813,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0323499746620655,
"step": 615
},
{
"calibration/aurc": 0.14598407216296222,
"calibration/batch_distribution_entropy": 0.982388407039738,
"calibration/buffer_distribution_entropy": 0.9813693991185566,
"calibration/confidence_entropy": 0.489513506980086,
"calibration/coverage@0%": 0.13854166666666667,
"calibration/coverage@1%": 0.21770833333333334,
"calibration/coverage@10%": 0.4604166666666667,
"calibration/coverage@15%": 0.5911458333333334,
"calibration/coverage@20%": 0.6604166666666667,
"calibration/coverage@25%": 0.7458333333333333,
"calibration/coverage@30%": 0.8333333333333334,
"calibration/coverage@5%": 0.3848958333333333,
"calibration/ece": 0.22271012485923425,
"calibration/mean_confidence": 0.5231795626407657,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0004340277777777901,
"completions/max_length": 2951.8,
"completions/max_terminated_length": 2951.8,
"completions/mean_length": 739.6912475585938,
"completions/mean_terminated_length": 740.016650390625,
"completions/min_length": 78.2,
"completions/min_terminated_length": 144.4,
"epoch": 1.489593880076499,
"grad_norm": 0.0033593103289604187,
"learning_rate": 2.5240384615384618e-06,
"loss": 0.0135,
"num_tokens": 1620771995.0,
"reward": 1.0207280397415162,
"reward_std": 0.1027398332953453,
"rewards/accuracy_reward": 0.7306423664093018,
"rewards/brier_reward": 0.8059651494026184,
"rewards/confidence_uniqueness_reward": 0.950669014453888,
"rewards/format_reward": 0.9995659708976745,
"rewards/frontier_coverage_0": -0.008429169561713934,
"rewards/frontier_coverage_1": -0.008429169561713934,
"rewards/frontier_coverage_10": 0.03902908526360989,
"rewards/frontier_coverage_15": 0.09024645537137985,
"rewards/frontier_coverage_20": 0.1556310087442398,
"rewards/frontier_coverage_25": 0.23286145329475402,
"rewards/frontier_coverage_5": -0.008092107716947794,
"rewards/frontier_entropy_batch_reward": -0.2708683729171753,
"signal/accuracy_reward/centered_abs_mean": 0.12894422858953475,
"signal/accuracy_reward/group_std_mean": 0.17072508335113526,
"signal/accuracy_reward/group_zero_std_frac": 0.5111111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0046961665153504,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06447211429476737,
"signal/advantage_abs_mean": 0.769278085231781,
"signal/advantage_pre_scale_abs_mean": 0.07922997027635574,
"signal/advantage_pre_scale_std": 0.12357212156057358,
"signal/advantage_std": 0.9829211473464966,
"signal/brier_reward/centered_abs_mean": 0.11856135725975037,
"signal/brier_reward/group_std_mean": 0.1511201113462448,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18537597954273224,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011856135725975037,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014381918683648109,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018994222208857537,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02267582081258297,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014381919521838427,
"signal/format_reward/centered_abs_mean": 0.0008300781133584678,
"signal/format_reward/group_std_mean": 0.0021562909707427023,
"signal/format_reward/group_zero_std_frac": 0.9888888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006034436263144016,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004150390566792339,
"signal/frontier_coverage_0/centered_abs_mean": 0.17420557141304016,
"signal/frontier_coverage_0/group_std_mean": 0.22342933118343353,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03900505751371384,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002491139620542526,
"signal/frontier_coverage_1/centered_abs_mean": 0.17420557141304016,
"signal/frontier_coverage_1/group_std_mean": 0.22342933118343353,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03900505751371384,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002491139620542526,
"signal/frontier_coverage_10/centered_abs_mean": 0.06426115781068802,
"signal/frontier_coverage_10/group_std_mean": 0.08098717033863068,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014438208192586899,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009189345757476986,
"signal/frontier_coverage_15/centered_abs_mean": 0.07448446601629258,
"signal/frontier_coverage_15/group_std_mean": 0.09238378256559372,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016728433780372143,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010651277843862772,
"signal/frontier_coverage_20/centered_abs_mean": 0.10093164592981338,
"signal/frontier_coverage_20/group_std_mean": 0.12564358860254288,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02264065630733967,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014433225616812706,
"signal/frontier_coverage_25/centered_abs_mean": 0.13600083589553832,
"signal/frontier_coverage_25/group_std_mean": 0.17014565765857698,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0304828904569149,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019448119215667247,
"signal/frontier_coverage_5/centered_abs_mean": 0.17328290045261383,
"signal/frontier_coverage_5/group_std_mean": 0.22228720486164094,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038798777014017106,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002477945387363434,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3208712935447693,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3883806228637695,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5037640929222107,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03208713196218014,
"step": 620
},
{
"calibration/aurc": 0.13299089792623325,
"calibration/batch_distribution_entropy": 0.9681063933987465,
"calibration/buffer_distribution_entropy": 0.9824702871929866,
"calibration/confidence_entropy": 0.49371973292672955,
"calibration/coverage@0%": 0.06255439512619669,
"calibration/coverage@1%": 0.06255439512619669,
"calibration/coverage@10%": 0.45554694299390774,
"calibration/coverage@15%": 0.6134655134899913,
"calibration/coverage@20%": 0.7713337684943429,
"calibration/coverage@25%": 0.8723958333333333,
"calibration/coverage@30%": 0.9348958333333333,
"calibration/coverage@5%": 0.40026381636205394,
"calibration/ece": 0.1668714719484334,
"calibration/mean_confidence": 0.5829425275076152,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000868055555555558,
"completions/max_length": 2621.2,
"completions/max_terminated_length": 2621.2,
"completions/mean_length": 679.5066040039062,
"completions/mean_terminated_length": 680.1067504882812,
"completions/min_length": 29.8,
"completions/min_terminated_length": 123.8,
"epoch": 1.501593730078374,
"grad_norm": 0.003809634130448103,
"learning_rate": 2.4939903846153847e-06,
"loss": 0.006,
"num_tokens": 1631696295.0,
"reward": 1.0135034561157226,
"reward_std": 0.1094308227300644,
"rewards/accuracy_reward": 0.7105902791023254,
"rewards/brier_reward": 0.8304842948913574,
"rewards/confidence_uniqueness_reward": 0.9500762939453125,
"rewards/format_reward": 0.9991319298744201,
"rewards/frontier_coverage_0": 0.025595280434936286,
"rewards/frontier_coverage_1": 0.025595280434936286,
"rewards/frontier_coverage_10": 0.046889835596084596,
"rewards/frontier_coverage_15": 0.0962829276919365,
"rewards/frontier_coverage_20": 0.1629137009382248,
"rewards/frontier_coverage_25": 0.24148197174072267,
"rewards/frontier_coverage_5": 0.02585846995934844,
"rewards/frontier_entropy_batch_reward": -0.2834572374820709,
"signal/accuracy_reward/centered_abs_mean": 0.13683811128139495,
"signal/accuracy_reward/group_std_mean": 0.18167079985141754,
"signal/accuracy_reward/group_zero_std_frac": 0.4777777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.055351686477661,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06841905564069747,
"signal/advantage_abs_mean": 0.7610543847084046,
"signal/advantage_pre_scale_abs_mean": 0.08355010896921158,
"signal/advantage_pre_scale_std": 0.13148944824934006,
"signal/advantage_std": 0.9829296469688416,
"signal/brier_reward/centered_abs_mean": 0.10652481764554977,
"signal/brier_reward/group_std_mean": 0.13742452561855317,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16548333764076234,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010652481578290462,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015012368932366372,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0208422277122736,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0234722301363945,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015012368094176054,
"signal/format_reward/centered_abs_mean": 0.001649305538740009,
"signal/format_reward/group_std_mean": 0.004259948246181011,
"signal/format_reward/group_zero_std_frac": 0.9777777671813965,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013225546292960643,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008246527693700045,
"signal/frontier_coverage_0/centered_abs_mean": 0.15693804621696472,
"signal/frontier_coverage_0/group_std_mean": 0.20245675444602967,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03491860181093216,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002244214015081525,
"signal/frontier_coverage_1/centered_abs_mean": 0.15693804621696472,
"signal/frontier_coverage_1/group_std_mean": 0.20245675444602967,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03491860181093216,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002244214015081525,
"signal/frontier_coverage_10/centered_abs_mean": 0.05752530992031098,
"signal/frontier_coverage_10/group_std_mean": 0.07304619401693344,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012821021303534508,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008226120029576123,
"signal/frontier_coverage_15/centered_abs_mean": 0.07470366805791855,
"signal/frontier_coverage_15/group_std_mean": 0.09323683530092239,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016622103564441205,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010682624764740468,
"signal/frontier_coverage_20/centered_abs_mean": 0.1053359866142273,
"signal/frontier_coverage_20/group_std_mean": 0.13195045590400695,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02341417223215103,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015063045779243112,
"signal/frontier_coverage_25/centered_abs_mean": 0.1433310478925705,
"signal/frontier_coverage_25/group_std_mean": 0.18033002614974974,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03183464221656322,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020496340468525885,
"signal/frontier_coverage_5/centered_abs_mean": 0.15589080452919007,
"signal/frontier_coverage_5/group_std_mean": 0.20115500092506408,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03468661829829216,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00222923846449703,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32396227717399595,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39139692187309266,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5038922011852265,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03239622823894024,
"step": 625
},
{
"calibration/aurc": 0.18900542943636528,
"calibration/batch_distribution_entropy": 0.9588022252491543,
"calibration/buffer_distribution_entropy": 0.9832399180795719,
"calibration/confidence_entropy": 0.49511201168554775,
"calibration/coverage@0%": 0.027083333333333338,
"calibration/coverage@1%": 0.027083333333333338,
"calibration/coverage@10%": 0.20416666666666666,
"calibration/coverage@15%": 0.3796875,
"calibration/coverage@20%": 0.5932291666666667,
"calibration/coverage@25%": 0.7838541666666667,
"calibration/coverage@30%": 0.8895833333333334,
"calibration/coverage@5%": 0.0640625,
"calibration/ece": 0.13272444218749999,
"calibration/mean_confidence": 0.5808325671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0002604166666666741,
"completions/max_length": 2794.4,
"completions/max_terminated_length": 2794.4,
"completions/mean_length": 645.9603515625,
"completions/mean_terminated_length": 646.1424438476563,
"completions/min_length": 102.4,
"completions/min_terminated_length": 128.4,
"epoch": 1.513593580080249,
"grad_norm": 0.0036048083566129208,
"learning_rate": 2.463942307692308e-06,
"loss": 0.0061,
"num_tokens": 1642238590.0,
"reward": 1.0172206521034242,
"reward_std": 0.09594685435295106,
"rewards/accuracy_reward": 0.7276041626930236,
"rewards/brier_reward": 0.8266344904899597,
"rewards/confidence_uniqueness_reward": 0.9474079608917236,
"rewards/format_reward": 0.9997395873069763,
"rewards/frontier_coverage_0": 0.016538088396191596,
"rewards/frontier_coverage_1": 0.016538088396191596,
"rewards/frontier_coverage_10": 0.045445504039525984,
"rewards/frontier_coverage_15": 0.09864003360271453,
"rewards/frontier_coverage_20": 0.1674270361661911,
"rewards/frontier_coverage_25": 0.24783487915992736,
"rewards/frontier_coverage_5": 0.016732219979166983,
"rewards/frontier_entropy_batch_reward": -0.3256641149520874,
"signal/accuracy_reward/centered_abs_mean": 0.10422092080116271,
"signal/accuracy_reward/group_std_mean": 0.14965912103652954,
"signal/accuracy_reward/group_zero_std_frac": 0.522222226858139,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8708725094795227,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05211046040058136,
"signal/advantage_abs_mean": 0.7522642731666564,
"signal/advantage_pre_scale_abs_mean": 0.07094881534576417,
"signal/advantage_pre_scale_std": 0.11598304659128189,
"signal/advantage_std": 0.982811689376831,
"signal/brier_reward/centered_abs_mean": 0.10578378438949584,
"signal/brier_reward/group_std_mean": 0.1379517912864685,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17751872837543486,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010578378662467003,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016063277050852774,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020402568578720092,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026994920149445534,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001606327760964632,
"signal/format_reward/centered_abs_mean": 0.000493706576526165,
"signal/format_reward/group_std_mean": 0.0011741982772946358,
"signal/format_reward/group_zero_std_frac": 0.9944444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004082060605287552,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002468532882630825,
"signal/frontier_coverage_0/centered_abs_mean": 0.1364389628171921,
"signal/frontier_coverage_0/group_std_mean": 0.1777593731880188,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03274801447987556,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019510772079229354,
"signal/frontier_coverage_1/centered_abs_mean": 0.1364389628171921,
"signal/frontier_coverage_1/group_std_mean": 0.1777593731880188,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03274801447987556,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019510772079229354,
"signal/frontier_coverage_10/centered_abs_mean": 0.056586884707212445,
"signal/frontier_coverage_10/group_std_mean": 0.07152153998613357,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01358992587774992,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008091924944892525,
"signal/frontier_coverage_15/centered_abs_mean": 0.07719572931528092,
"signal/frontier_coverage_15/group_std_mean": 0.09610024690628052,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018524457514286042,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011038989294320344,
"signal/frontier_coverage_20/centered_abs_mean": 0.10774560272693634,
"signal/frontier_coverage_20/group_std_mean": 0.135586416721344,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02583777755498886,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015407620463520288,
"signal/frontier_coverage_25/centered_abs_mean": 0.14388639628887176,
"signal/frontier_coverage_25/group_std_mean": 0.1827654093503952,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03448529541492462,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002057575364597142,
"signal/frontier_coverage_5/centered_abs_mean": 0.1355312928557396,
"signal/frontier_coverage_5/group_std_mean": 0.1766209274530411,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03252986185252667,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019380974117666483,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3347103834152222,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3980586588382721,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5616107821464539,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033471040055155755,
"step": 630
},
{
"calibration/aurc": 0.10900926811330347,
"calibration/batch_distribution_entropy": 0.9646768562626127,
"calibration/buffer_distribution_entropy": 0.983300626563356,
"calibration/confidence_entropy": 0.47932893499263984,
"calibration/coverage@0%": 0.028125,
"calibration/coverage@1%": 0.028125,
"calibration/coverage@10%": 0.5963541666666666,
"calibration/coverage@15%": 0.771875,
"calibration/coverage@20%": 0.8604166666666666,
"calibration/coverage@25%": 0.9265625,
"calibration/coverage@30%": 0.965625,
"calibration/coverage@5%": 0.3578125,
"calibration/ece": 0.16915577300284446,
"calibration/mean_confidence": 0.5765589813361778,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005208333333333482,
"completions/max_length": 3458.0,
"completions/max_terminated_length": 3458.0,
"completions/mean_length": 707.9698120117188,
"completions/mean_terminated_length": 708.3488525390625,
"completions/min_length": 29.6,
"completions/min_terminated_length": 142.2,
"epoch": 1.525593430082124,
"grad_norm": 0.0034078743774443865,
"learning_rate": 2.433894230769231e-06,
"loss": 0.0153,
"num_tokens": 1653519362.0,
"reward": 1.0050111770629884,
"reward_std": 0.104340460896492,
"rewards/accuracy_reward": 0.6942708373069764,
"rewards/brier_reward": 0.8122452735900879,
"rewards/confidence_uniqueness_reward": 0.9519274234771729,
"rewards/format_reward": 0.9994791507720947,
"rewards/frontier_coverage_0": 0.02422009650617838,
"rewards/frontier_coverage_1": 0.02422009650617838,
"rewards/frontier_coverage_10": 0.04215832352638245,
"rewards/frontier_coverage_15": 0.08379101604223252,
"rewards/frontier_coverage_20": 0.14118048250675203,
"rewards/frontier_coverage_25": 0.21112163364887238,
"rewards/frontier_coverage_5": 0.02440500818192959,
"rewards/frontier_entropy_batch_reward": -0.26161783635616304,
"signal/accuracy_reward/centered_abs_mean": 0.13336588591337203,
"signal/accuracy_reward/group_std_mean": 0.17580996751785277,
"signal/accuracy_reward/group_zero_std_frac": 0.5027777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0433040618896485,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06668294295668602,
"signal/advantage_abs_mean": 0.7685012340545654,
"signal/advantage_pre_scale_abs_mean": 0.08071336597204208,
"signal/advantage_pre_scale_std": 0.12523564100265502,
"signal/advantage_std": 0.9829237222671509,
"signal/brier_reward/centered_abs_mean": 0.11492740660905838,
"signal/brier_reward/group_std_mean": 0.14776553511619567,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18006122708320618,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011492740735411644,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013762745633721351,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01849808692932129,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021554048731923105,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013762745773419737,
"signal/format_reward/centered_abs_mean": 0.0009982638759538532,
"signal/format_reward/group_std_mean": 0.002647337270900607,
"signal/format_reward/group_zero_std_frac": 0.9861111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0077989035286009315,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004991319379769266,
"signal/frontier_coverage_0/centered_abs_mean": 0.1750947952270508,
"signal/frontier_coverage_0/group_std_mean": 0.22593581676483154,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03923875316977501,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025038556195795536,
"signal/frontier_coverage_1/centered_abs_mean": 0.1750947952270508,
"signal/frontier_coverage_1/group_std_mean": 0.22593581676483154,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03923875316977501,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025038556195795536,
"signal/frontier_coverage_10/centered_abs_mean": 0.06284371763467789,
"signal/frontier_coverage_10/group_std_mean": 0.08010470569133758,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014089632220566273,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008986651315353811,
"signal/frontier_coverage_15/centered_abs_mean": 0.06957742124795914,
"signal/frontier_coverage_15/group_std_mean": 0.08731958866119385,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015588978677988053,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009949571453034879,
"signal/frontier_coverage_20/centered_abs_mean": 0.09489159286022186,
"signal/frontier_coverage_20/group_std_mean": 0.119244185090065,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021246416494250298,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013569497969001532,
"signal/frontier_coverage_25/centered_abs_mean": 0.1292428568005562,
"signal/frontier_coverage_25/group_std_mean": 0.1630004495382309,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028931079804897307,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018481727223843337,
"signal/frontier_coverage_5/centered_abs_mean": 0.17432405054569244,
"signal/frontier_coverage_5/group_std_mean": 0.22496304512023926,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03906645104289055,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002492833789438009,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31597875356674193,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38909701704978944,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49435396790504454,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03159787617623806,
"step": 635
},
{
"calibration/aurc": 0.132739511797332,
"calibration/batch_distribution_entropy": 0.9625923694508393,
"calibration/buffer_distribution_entropy": 0.9831253583300397,
"calibration/confidence_entropy": 0.48128109715281264,
"calibration/coverage@0%": 0.07874510443864229,
"calibration/coverage@1%": 0.12156494778067883,
"calibration/coverage@10%": 0.42284323324630113,
"calibration/coverage@15%": 0.6694476174934726,
"calibration/coverage@20%": 0.7909432114882506,
"calibration/coverage@25%": 0.8618336597040905,
"calibration/coverage@30%": 0.9082435813751089,
"calibration/coverage@5%": 0.31388571583986075,
"calibration/ece": 0.14269792804612705,
"calibration/mean_confidence": 0.5975095459394038,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001128472222222232,
"completions/max_length": 3077.8,
"completions/max_terminated_length": 3077.8,
"completions/mean_length": 766.5240600585937,
"completions/mean_terminated_length": 767.429052734375,
"completions/min_length": 20.2,
"completions/min_terminated_length": 126.8,
"epoch": 1.5375932800839989,
"grad_norm": 0.003272005822509527,
"learning_rate": 2.403846153846154e-06,
"loss": 0.0027,
"num_tokens": 1665426583.0,
"reward": 1.0096034407615662,
"reward_std": 0.09776319563388824,
"rewards/accuracy_reward": 0.7085069417953491,
"rewards/brier_reward": 0.8340772271156311,
"rewards/confidence_uniqueness_reward": 0.9471764445304871,
"rewards/format_reward": 0.9988715291023255,
"rewards/frontier_coverage_0": 0.0319485223852098,
"rewards/frontier_coverage_1": 0.0319485223852098,
"rewards/frontier_coverage_10": 0.04951897189021111,
"rewards/frontier_coverage_15": 0.10320504754781723,
"rewards/frontier_coverage_20": 0.17310980558395386,
"rewards/frontier_coverage_25": 0.25380962789058686,
"rewards/frontier_coverage_5": 0.032035707216709855,
"rewards/frontier_entropy_batch_reward": -0.3187188684940338,
"signal/accuracy_reward/centered_abs_mean": 0.11482204794883728,
"signal/accuracy_reward/group_std_mean": 0.15133090913295746,
"signal/accuracy_reward/group_zero_std_frac": 0.569444453716278,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0013312339782714,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05741102397441864,
"signal/advantage_abs_mean": 0.7768993496894836,
"signal/advantage_pre_scale_abs_mean": 0.07542656362056732,
"signal/advantage_pre_scale_std": 0.12090198844671249,
"signal/advantage_std": 0.9827402710914612,
"signal/brier_reward/centered_abs_mean": 0.10435597896575928,
"signal/brier_reward/group_std_mean": 0.13554692268371582,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18235518038272858,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010435598157346248,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016541999764740467,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023310190439224242,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0289510115981102,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016541999299079179,
"signal/format_reward/centered_abs_mean": 0.0021213107858784495,
"signal/format_reward/group_std_mean": 0.005352780409157276,
"signal/format_reward/group_zero_std_frac": 0.9722222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.018210524041205645,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0010606553929392248,
"signal/frontier_coverage_0/centered_abs_mean": 0.15110780000686647,
"signal/frontier_coverage_0/group_std_mean": 0.19436927139759064,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03769223988056183,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021608415991067886,
"signal/frontier_coverage_1/centered_abs_mean": 0.15110780000686647,
"signal/frontier_coverage_1/group_std_mean": 0.19436927139759064,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03769223988056183,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021608415991067886,
"signal/frontier_coverage_10/centered_abs_mean": 0.060875777155160904,
"signal/frontier_coverage_10/group_std_mean": 0.07657611966133118,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015234320424497128,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008705236483365298,
"signal/frontier_coverage_15/centered_abs_mean": 0.07580193877220154,
"signal/frontier_coverage_15/group_std_mean": 0.09422909021377564,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019024584069848062,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010839677881449461,
"signal/frontier_coverage_20/centered_abs_mean": 0.10344732105731964,
"signal/frontier_coverage_20/group_std_mean": 0.130024753510952,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02597166895866394,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014792966656386852,
"signal/frontier_coverage_25/centered_abs_mean": 0.1384286493062973,
"signal/frontier_coverage_25/group_std_mean": 0.1750235766172409,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.034739000350236894,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001979529578238726,
"signal/frontier_coverage_5/centered_abs_mean": 0.15033363848924636,
"signal/frontier_coverage_5/group_std_mean": 0.19340720176696777,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03749907538294792,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021497709909453987,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32275074124336245,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3881240785121918,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5654678821563721,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03227507434785366,
"step": 640
},
{
"calibration/aurc": 0.1632617220818313,
"calibration/batch_distribution_entropy": 0.9397674467638419,
"calibration/buffer_distribution_entropy": 0.9831640113121327,
"calibration/confidence_entropy": 0.49047782580285765,
"calibration/coverage@0%": 0.020836053089643168,
"calibration/coverage@1%": 0.020836053089643168,
"calibration/coverage@10%": 0.2645860530896432,
"calibration/coverage@15%": 0.6326425152306354,
"calibration/coverage@20%": 0.7118798955613578,
"calibration/coverage@25%": 0.7827485857267188,
"calibration/coverage@30%": 0.9848645561357703,
"calibration/coverage@5%": 0.10208605308964318,
"calibration/ece": 0.1834517613740209,
"calibration/mean_confidence": 0.6010985707734987,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0006076388888889061,
"completions/max_length": 3129.0,
"completions/max_terminated_length": 3129.0,
"completions/mean_length": 815.459033203125,
"completions/mean_terminated_length": 815.9496215820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 1.5495931300858738,
"grad_norm": 0.0032203816808760166,
"learning_rate": 2.373798076923077e-06,
"loss": 0.0031,
"num_tokens": 1677919711.0,
"reward": 1.0256015539169312,
"reward_std": 0.09568341672420502,
"rewards/accuracy_reward": 0.7515625,
"rewards/brier_reward": 0.8331769466400146,
"rewards/confidence_uniqueness_reward": 0.9456747055053711,
"rewards/format_reward": 0.9993923544883728,
"rewards/frontier_coverage_0": 0.0019667490385472776,
"rewards/frontier_coverage_1": 0.0019667490385472776,
"rewards/frontier_coverage_10": 0.04397192746400833,
"rewards/frontier_coverage_15": 0.10374006181955338,
"rewards/frontier_coverage_20": 0.17997487783432006,
"rewards/frontier_coverage_25": 0.2689637690782547,
"rewards/frontier_coverage_5": 0.002235945500433445,
"rewards/frontier_entropy_batch_reward": -0.36381399631500244,
"signal/accuracy_reward/centered_abs_mean": 0.10557725727558136,
"signal/accuracy_reward/group_std_mean": 0.14509375542402267,
"signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.873564088344574,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05278862863779068,
"signal/advantage_abs_mean": 0.7705256938934326,
"signal/advantage_pre_scale_abs_mean": 0.07288682162761688,
"signal/advantage_pre_scale_std": 0.1167471945285797,
"signal/advantage_std": 0.9828287601470947,
"signal/brier_reward/centered_abs_mean": 0.10486017912626266,
"signal/brier_reward/group_std_mean": 0.13558290153741837,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17376516461372377,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01048601809889078,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015681835077703,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02083849869668484,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026080520078539848,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015681835589930414,
"signal/format_reward/centered_abs_mean": 0.0011447482742369176,
"signal/format_reward/group_std_mean": 0.0027868092060089112,
"signal/format_reward/group_zero_std_frac": 0.9861111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00944533757865429,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005723741371184588,
"signal/frontier_coverage_0/centered_abs_mean": 0.13758010864257814,
"signal/frontier_coverage_0/group_std_mean": 0.17860827147960662,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03260061703622341,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019673954462632536,
"signal/frontier_coverage_1/centered_abs_mean": 0.13758010864257814,
"signal/frontier_coverage_1/group_std_mean": 0.17860827147960662,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03260061703622341,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019673954462632536,
"signal/frontier_coverage_10/centered_abs_mean": 0.05716334953904152,
"signal/frontier_coverage_10/group_std_mean": 0.0721943661570549,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013553774170577525,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008174359099939466,
"signal/frontier_coverage_15/centered_abs_mean": 0.08003035187721252,
"signal/frontier_coverage_15/group_std_mean": 0.09915542453527451,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01901390254497528,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011444339528679849,
"signal/frontier_coverage_20/centered_abs_mean": 0.11281442493200303,
"signal/frontier_coverage_20/group_std_mean": 0.14117856323719025,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026818398758769034,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016132463002577423,
"signal/frontier_coverage_25/centered_abs_mean": 0.15117188692092895,
"signal/frontier_coverage_25/group_std_mean": 0.19031570255756378,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0359354741871357,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002161757950671017,
"signal/frontier_coverage_5/centered_abs_mean": 0.13697426319122313,
"signal/frontier_coverage_5/group_std_mean": 0.17787103354930878,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032456709817051885,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019587320508435368,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3425477683544159,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40490528345108034,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5694766044616699,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034254778921604154,
"step": 645
},
{
"calibration/aurc": 0.19994838141742183,
"calibration/batch_distribution_entropy": 0.9632098414546952,
"calibration/buffer_distribution_entropy": 0.9829315610905208,
"calibration/confidence_entropy": 0.4787305902270136,
"calibration/coverage@0%": 0.053234075043630015,
"calibration/coverage@1%": 0.053234075043630015,
"calibration/coverage@10%": 0.23770451570680629,
"calibration/coverage@15%": 0.33285067626527054,
"calibration/coverage@20%": 0.5511071116928448,
"calibration/coverage@25%": 0.7573271160558463,
"calibration/coverage@30%": 0.8289839659685864,
"calibration/coverage@5%": 0.1424956369982548,
"calibration/ece": 0.18254347376199828,
"calibration/mean_confidence": 0.49279760826243457,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00277777777777779,
"completions/max_length": 3912.0,
"completions/max_terminated_length": 3912.0,
"completions/mean_length": 1008.4895629882812,
"completions/mean_terminated_length": 1011.3613891601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.2,
"epoch": 1.5615929800877488,
"grad_norm": 0.0029296150896698236,
"learning_rate": 2.3437500000000002e-06,
"loss": 0.0108,
"num_tokens": 1692632999.0,
"reward": 1.0104209780693054,
"reward_std": 0.10768526047468185,
"rewards/accuracy_reward": 0.7127604126930237,
"rewards/brier_reward": 0.8133676528930665,
"rewards/confidence_uniqueness_reward": 0.9476563215255738,
"rewards/format_reward": 0.9972222328186036,
"rewards/frontier_coverage_0": 0.0160905129625462,
"rewards/frontier_coverage_1": 0.0160905129625462,
"rewards/frontier_coverage_10": 0.04221442565321922,
"rewards/frontier_coverage_15": 0.08863531947135925,
"rewards/frontier_coverage_20": 0.15275568068027495,
"rewards/frontier_coverage_25": 0.23076119422912597,
"rewards/frontier_coverage_5": 0.016149751842021942,
"rewards/frontier_entropy_batch_reward": -0.2871933221817017,
"signal/accuracy_reward/centered_abs_mean": 0.13562825918197632,
"signal/accuracy_reward/group_std_mean": 0.1758606731891632,
"signal/accuracy_reward/group_zero_std_frac": 0.5083333492279053,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0496046185493468,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06781412959098816,
"signal/advantage_abs_mean": 0.7706871032714844,
"signal/advantage_pre_scale_abs_mean": 0.08306800574064255,
"signal/advantage_pre_scale_std": 0.13049945682287217,
"signal/advantage_std": 0.9829372525215149,
"signal/brier_reward/centered_abs_mean": 0.12161406874656677,
"signal/brier_reward/group_std_mean": 0.1559804707765579,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1888038247823715,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012161407060921193,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018055187538266182,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026942530646920204,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02799047380685806,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018055187771096825,
"signal/format_reward/centered_abs_mean": 0.005078125023283064,
"signal/format_reward/group_std_mean": 0.010953563638031483,
"signal/format_reward/group_zero_std_frac": 0.95,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03934686332941055,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002539062511641532,
"signal/frontier_coverage_0/centered_abs_mean": 0.18165695369243623,
"signal/frontier_coverage_0/group_std_mean": 0.23133923709392548,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04036081805825233,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025976944249123334,
"signal/frontier_coverage_1/centered_abs_mean": 0.18165695369243623,
"signal/frontier_coverage_1/group_std_mean": 0.23133923709392548,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04036081805825233,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025976944249123334,
"signal/frontier_coverage_10/centered_abs_mean": 0.06570319607853889,
"signal/frontier_coverage_10/group_std_mean": 0.08306444734334946,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014596488140523434,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009395557222887874,
"signal/frontier_coverage_15/centered_abs_mean": 0.07579767853021621,
"signal/frontier_coverage_15/group_std_mean": 0.09315522462129593,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016809598729014396,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010839068330824376,
"signal/frontier_coverage_20/centered_abs_mean": 0.10389769673347474,
"signal/frontier_coverage_20/group_std_mean": 0.12809243351221083,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023021703585982323,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014857371104881167,
"signal/frontier_coverage_25/centered_abs_mean": 0.1412452608346939,
"signal/frontier_coverage_25/group_std_mean": 0.1752469062805176,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031283880770206454,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020198072539642452,
"signal/frontier_coverage_5/centered_abs_mean": 0.1812539279460907,
"signal/frontier_coverage_5/group_std_mean": 0.23083524107933046,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.040271298587322236,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025919311214238406,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3169364869594574,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.384937995672226,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.491671484708786,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031693648919463156,
"step": 650
},
{
"epoch": 1.5615929800877488,
"eval_calibration/aurc": 0.1258854326762919,
"eval_calibration/batch_distribution_entropy": 0.92262165151983,
"eval_calibration/buffer_distribution_entropy": 0.9839670771333515,
"eval_calibration/confidence_entropy": 0.5051808162676946,
"eval_calibration/coverage@0%": 0.22916666666666666,
"eval_calibration/coverage@1%": 0.22916666666666666,
"eval_calibration/coverage@10%": 0.546875,
"eval_calibration/coverage@15%": 0.6302083333333334,
"eval_calibration/coverage@20%": 0.8125,
"eval_calibration/coverage@25%": 0.9479166666666666,
"eval_calibration/coverage@30%": 0.9635416666666666,
"eval_calibration/coverage@5%": 0.421875,
"eval_calibration/ece": 0.28119635416666666,
"eval_calibration/mean_confidence": 0.5181848958333334,
"eval_completions/clipped_ratio": 0.006944444444444457,
"eval_completions/max_length": 3462.6666666666665,
"eval_completions/max_terminated_length": 3462.6666666666665,
"eval_completions/mean_length": 1139.494120279948,
"eval_completions/mean_terminated_length": 1147.5079345703125,
"eval_completions/min_length": 89.66666666666667,
"eval_completions/min_terminated_length": 220.0,
"eval_loss": 0.0,
"eval_num_tokens": 1692632999.0,
"eval_reward": 0.9208190242449442,
"eval_reward_std": 0.23059933632612228,
"eval_rewards/accuracy_reward": 0.6909722288449606,
"eval_rewards/brier_reward": 0.8101545870304108,
"eval_rewards/confidence_uniqueness_reward": 0.8898043135801951,
"eval_rewards/format_reward": 0.9921875099341074,
"eval_rewards/frontier_coverage_0": 0.03612114832503721,
"eval_rewards/frontier_coverage_1": 0.03612114832503721,
"eval_rewards/frontier_coverage_10": 0.04670971849312385,
"eval_rewards/frontier_coverage_15": 0.08305741598208745,
"eval_rewards/frontier_coverage_20": 0.14025516683856645,
"eval_rewards/frontier_coverage_25": 0.21333598345518112,
"eval_rewards/frontier_coverage_5": 0.03614697029115632,
"eval_rewards/frontier_entropy_batch_reward": -0.9921875099341074,
"eval_runtime": 214.7999,
"eval_samples_per_second": 4.655,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4059244791666667,
"eval_signal/accuracy_reward/group_std_mean": 0.4549813171227773,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8949279487133026,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20296223958333334,
"eval_signal/advantage_abs_mean": 0.8530746897061666,
"eval_signal/advantage_pre_scale_abs_mean": 0.1983613446354866,
"eval_signal/advantage_pre_scale_std": 0.22935798267523447,
"eval_signal/advantage_std": 0.9863868057727814,
"eval_signal/brier_reward/centered_abs_mean": 0.17868993182977042,
"eval_signal/brier_reward/group_std_mean": 0.24043517063061395,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07871770237882932,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01786899333819747,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04949278508623441,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07535891359051068,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021756678509215515,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004949278353403012,
"eval_signal/format_reward/centered_abs_mean": 0.015028211753815413,
"eval_signal/format_reward/group_std_mean": 0.04120476512859265,
"eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0320138872290651,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007514105876907706,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.32204001148541767,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4291209429502487,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02039930286506812,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004605172357211511,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.32204001148541767,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4291209429502487,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02039930286506812,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004605172357211511,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.092753649999698,
"eval_signal/frontier_coverage_10/group_std_mean": 0.12686272462209067,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0058677659059564275,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013263771737304826,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.09765455995996793,
"eval_signal/frontier_coverage_15/group_std_mean": 0.12701285382111868,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00617311514603595,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013964602064030867,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.16553593426942825,
"eval_signal/frontier_coverage_20/group_std_mean": 0.20874186108509699,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.010455410151431957,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002367163930709163,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.24852682650089264,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3072594503561656,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01568988710641861,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035539336192111173,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.32129410405953723,
"eval_signal/frontier_coverage_5/group_std_mean": 0.42822254200776416,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.020351968084772427,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0045945055317133665,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.015028211753815413,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.04120476512859265,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777778009573618,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.006402777663121621,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0015028212607527773,
"eval_steps_per_second": 0.028,
"step": 650
},
{
"epoch": 1.5615929800877488,
"step": 650,
"train_probe_calibration/aurc": 0.1050277420453154,
"train_probe_calibration/batch_distribution_entropy": 0.92016610975414,
"train_probe_calibration/buffer_distribution_entropy": 0.9841441810954885,
"train_probe_calibration/confidence_entropy": 0.5080130716251837,
"train_probe_calibration/coverage@0%": 0.3111559139784946,
"train_probe_calibration/coverage@1%": 0.3111559139784946,
"train_probe_calibration/coverage@10%": 0.5475470430107526,
"train_probe_calibration/coverage@15%": 0.7043010752688171,
"train_probe_calibration/coverage@20%": 0.8738239247311829,
"train_probe_calibration/coverage@25%": 0.9529569892473119,
"train_probe_calibration/coverage@30%": 0.9791666666666666,
"train_probe_calibration/coverage@5%": 0.363239247311828,
"train_probe_calibration/ece": 0.22604252352150536,
"train_probe_calibration/mean_confidence": 0.5218310987903226,
"train_probe_completions/clipped_ratio": 0.005208333333333315,
"train_probe_completions/max_length": 3275.0,
"train_probe_completions/max_terminated_length": 3275.0,
"train_probe_completions/mean_length": 1149.7640787760417,
"train_probe_completions/mean_terminated_length": 1155.8470865885417,
"train_probe_completions/min_length": 91.83333333333333,
"train_probe_completions/min_terminated_length": 190.16666666666666,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1692632999.0,
"train_probe_reward": 0.9475771685441335,
"train_probe_reward_std": 0.21386076509952545,
"train_probe_rewards/accuracy_reward": 0.7439236144224802,
"train_probe_rewards/brier_reward": 0.8085533181826273,
"train_probe_rewards/confidence_uniqueness_reward": 0.8970864415168762,
"train_probe_rewards/format_reward": 0.9947916567325592,
"train_probe_rewards/frontier_coverage_0": -0.004265061909488092,
"train_probe_rewards/frontier_coverage_1": -0.004265061909488092,
"train_probe_rewards/frontier_coverage_10": 0.0401507547746102,
"train_probe_rewards/frontier_coverage_15": 0.087270587682724,
"train_probe_rewards/frontier_coverage_20": 0.15156510472297668,
"train_probe_rewards/frontier_coverage_25": 0.23263747741778693,
"train_probe_rewards/frontier_coverage_5": -0.004163547380206485,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9947916567325592,
"train_probe_runtime": 207.3917,
"train_probe_samples_per_second": 4.822,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.36865234375,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4333516408999761,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8816647529602051,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.184326171875,
"train_probe_signal/advantage_abs_mean": 0.8192966183026632,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.1771583134929339,
"train_probe_signal/advantage_pre_scale_std": 0.21326116969188055,
"train_probe_signal/advantage_std": 0.9863499303658804,
"train_probe_signal/brier_reward/centered_abs_mean": 0.17456327378749847,
"train_probe_signal/brier_reward/group_std_mean": 0.2288526544968287,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0834679293135802,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.017456327254573505,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04140195933481058,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.062413097048799195,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019774133029083412,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004140196174072723,
"train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475,
"train_probe_signal/format_reward/group_std_mean": 0.02946278266608715,
"train_probe_signal/format_reward/group_zero_std_frac": 0.8333333432674408,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022984805206457775,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.3087007204691569,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.42339272300402325,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.021187719888985157,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0044144203420728445,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3087007204691569,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.42339272300402325,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.021187719888985157,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044144203420728445,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.09169654672344525,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.12483824168642361,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006285120112200578,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013112605665810406,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.09570210054516792,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.12248214582602183,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006561545344690482,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013685400481335819,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1610035002231598,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.19909277806679407,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011035082396119833,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023023500107228756,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.23792067666848501,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.2904689262310664,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016299090658624966,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034022655648489795,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3079666793346405,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4224870850642522,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02113716086993615,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00440392301728328,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.010091145522892475,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.02946278266608715,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333432674408,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004596961506952842,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0010091145910943549,
"train_probe_steps_per_second": 0.029
},
{
"calibration/aurc": 0.08886764164430172,
"calibration/batch_distribution_entropy": 0.9755997519212493,
"calibration/buffer_distribution_entropy": 0.9844547186954102,
"calibration/confidence_entropy": 0.47347940093345214,
"calibration/coverage@0%": 0.2267192725398695,
"calibration/coverage@1%": 0.28414543623541816,
"calibration/coverage@10%": 0.6487030533794279,
"calibration/coverage@15%": 0.7547508466709868,
"calibration/coverage@20%": 0.8620347968052193,
"calibration/coverage@25%": 0.9512310997257731,
"calibration/coverage@30%": 0.9786096256684491,
"calibration/coverage@5%": 0.4562793073317689,
"calibration/ece": 0.2560435482025544,
"calibration/mean_confidence": 0.4828333317576958,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013107638888888884,
"completions/max_length": 4042.2,
"completions/max_terminated_length": 4042.2,
"completions/mean_length": 1245.147509765625,
"completions/mean_terminated_length": 1262.2794189453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.0,
"epoch": 1.5735928300896238,
"grad_norm": 0.0022649674210697412,
"learning_rate": 2.3137019230769236e-06,
"loss": -0.0295,
"num_tokens": 1710094378.0,
"reward": 1.0154491662979126,
"reward_std": 0.12489555925130844,
"rewards/accuracy_reward": 0.735156238079071,
"rewards/brier_reward": 0.7945853352546692,
"rewards/confidence_uniqueness_reward": 0.9397654891014099,
"rewards/format_reward": 0.9868923544883728,
"rewards/frontier_coverage_0": -0.013256353419274091,
"rewards/frontier_coverage_1": -0.013256353419274091,
"rewards/frontier_coverage_10": 0.035689426213502885,
"rewards/frontier_coverage_15": 0.08503572195768357,
"rewards/frontier_coverage_20": 0.15036341547966003,
"rewards/frontier_coverage_25": 0.23061252534389495,
"rewards/frontier_coverage_5": -0.013073560688644648,
"rewards/frontier_entropy_batch_reward": -0.2561847805976868,
"signal/accuracy_reward/centered_abs_mean": 0.13704969435930253,
"signal/accuracy_reward/group_std_mean": 0.18030084669589996,
"signal/accuracy_reward/group_zero_std_frac": 0.4944444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.976815402507782,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06852484717965127,
"signal/advantage_abs_mean": 0.7512982010841369,
"signal/advantage_pre_scale_abs_mean": 0.09129708409309387,
"signal/advantage_pre_scale_std": 0.1536231279373169,
"signal/advantage_std": 0.9830477714538575,
"signal/brier_reward/centered_abs_mean": 0.12865829318761826,
"signal/brier_reward/group_std_mean": 0.1670507937669754,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1841784566640854,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012865828722715378,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03297282736748457,
"signal/confidence_uniqueness_reward/group_std_mean": 0.054193584248423576,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0467866700142622,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032972827553749084,
"signal/format_reward/centered_abs_mean": 0.02270507828798145,
"signal/format_reward/group_std_mean": 0.04204718470573425,
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1594323130324483,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011352539143990726,
"signal/frontier_coverage_0/centered_abs_mean": 0.18524830043315887,
"signal/frontier_coverage_0/group_std_mean": 0.23942944705486296,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03790371045470238,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002649050671607256,
"signal/frontier_coverage_1/centered_abs_mean": 0.18524830043315887,
"signal/frontier_coverage_1/group_std_mean": 0.23942944705486296,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03790371045470238,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002649050671607256,
"signal/frontier_coverage_10/centered_abs_mean": 0.06545519232749938,
"signal/frontier_coverage_10/group_std_mean": 0.08360625356435776,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013402448035776615,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009360092226415873,
"signal/frontier_coverage_15/centered_abs_mean": 0.07041083574295044,
"signal/frontier_coverage_15/group_std_mean": 0.08784731775522232,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014487495459616185,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001006874954327941,
"signal/frontier_coverage_20/centered_abs_mean": 0.0951210230588913,
"signal/frontier_coverage_20/group_std_mean": 0.1187201201915741,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0196021169424057,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001360230566933751,
"signal/frontier_coverage_25/centered_abs_mean": 0.1295736938714981,
"signal/frontier_coverage_25/group_std_mean": 0.16238105595111846,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02669799067080021,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018529037246480585,
"signal/frontier_coverage_5/centered_abs_mean": 0.18478002846240998,
"signal/frontier_coverage_5/group_std_mean": 0.2388526976108551,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03780748248100281,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002642354369163513,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3182023406028748,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3871363937854767,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45689463019371035,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03182023465633392,
"step": 655
},
{
"calibration/aurc": 0.09533119410246484,
"calibration/batch_distribution_entropy": 0.9460154236206373,
"calibration/buffer_distribution_entropy": 0.9849437384133127,
"calibration/confidence_entropy": 0.4857657950035287,
"calibration/coverage@0%": 0.1084363058159143,
"calibration/coverage@1%": 0.11890462537514294,
"calibration/coverage@10%": 0.5864796812562145,
"calibration/coverage@15%": 0.8011386517845708,
"calibration/coverage@20%": 0.9112986932632948,
"calibration/coverage@25%": 0.9652162001735081,
"calibration/coverage@30%": 0.9883977900552485,
"calibration/coverage@5%": 0.3859965976927132,
"calibration/ece": 0.15954822888034606,
"calibration/mean_confidence": 0.6012927915175033,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.029861111111111116,
"completions/max_length": 4036.6,
"completions/max_terminated_length": 4036.6,
"completions/mean_length": 1316.7138916015624,
"completions/mean_terminated_length": 1357.5852783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 223.2,
"epoch": 1.5855926800914988,
"grad_norm": 0.0021101038437336683,
"learning_rate": 2.283653846153846e-06,
"loss": -0.0947,
"num_tokens": 1728357482.0,
"reward": 0.9971134901046753,
"reward_std": 0.1492151975631714,
"rewards/accuracy_reward": 0.7189236044883728,
"rewards/brier_reward": 0.8177730441093445,
"rewards/confidence_uniqueness_reward": 0.9205069661140441,
"rewards/format_reward": 0.9701388835906982,
"rewards/frontier_coverage_0": 0.018412799527868628,
"rewards/frontier_coverage_1": 0.018412799527868628,
"rewards/frontier_coverage_10": 0.04661319591104984,
"rewards/frontier_coverage_15": 0.10446333140134811,
"rewards/frontier_coverage_20": 0.1796988695859909,
"rewards/frontier_coverage_25": 0.26911273002624514,
"rewards/frontier_coverage_5": 0.0184929336886853,
"rewards/frontier_entropy_batch_reward": -0.30615225434303284,
"signal/accuracy_reward/centered_abs_mean": 0.1266710117459297,
"signal/accuracy_reward/group_std_mean": 0.175567626953125,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8586621403694152,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06333550587296485,
"signal/advantage_abs_mean": 0.7222093343734741,
"signal/advantage_pre_scale_abs_mean": 0.1041327103972435,
"signal/advantage_pre_scale_std": 0.18754582107067108,
"signal/advantage_std": 0.983126699924469,
"signal/brier_reward/centered_abs_mean": 0.12975584119558334,
"signal/brier_reward/group_std_mean": 0.17287274599075317,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1763177275657654,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012975584715604782,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05719666481018067,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09497420340776444,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07740743607282638,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005719666136428714,
"signal/format_reward/centered_abs_mean": 0.04908854141831398,
"signal/format_reward/group_std_mean": 0.08599354475736617,
"signal/format_reward/group_zero_std_frac": 0.6722222208976746,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.331484454870224,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02454427070915699,
"signal/frontier_coverage_0/centered_abs_mean": 0.14936257898807526,
"signal/frontier_coverage_0/group_std_mean": 0.1966366797685623,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029026806727051734,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021358849480748177,
"signal/frontier_coverage_1/centered_abs_mean": 0.14936257898807526,
"signal/frontier_coverage_1/group_std_mean": 0.1966366797685623,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029026806727051734,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021358849480748177,
"signal/frontier_coverage_10/centered_abs_mean": 0.05857866555452347,
"signal/frontier_coverage_10/group_std_mean": 0.07446140795946121,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011402043513953686,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008376748883165419,
"signal/frontier_coverage_15/centered_abs_mean": 0.07696539610624313,
"signal/frontier_coverage_15/group_std_mean": 0.09492502957582474,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014994030632078648,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011006051674485207,
"signal/frontier_coverage_20/centered_abs_mean": 0.1082698255777359,
"signal/frontier_coverage_20/group_std_mean": 0.1335964471101761,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02107783704996109,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015482584713026881,
"signal/frontier_coverage_25/centered_abs_mean": 0.1468362033367157,
"signal/frontier_coverage_25/group_std_mean": 0.1822466194629669,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02857258655130863,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002099757781252265,
"signal/frontier_coverage_5/centered_abs_mean": 0.1490258753299713,
"signal/frontier_coverage_5/group_std_mean": 0.1962038218975067,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.028961464390158655,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021310700103640556,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.330526864528656,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39485923647880555,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45007047057151794,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03305268660187721,
"step": 660
},
{
"calibration/aurc": 0.09601976406181807,
"calibration/batch_distribution_entropy": 0.9514238208166249,
"calibration/buffer_distribution_entropy": 0.9848316736675014,
"calibration/confidence_entropy": 0.47648112021445366,
"calibration/coverage@0%": 0.151898181384355,
"calibration/coverage@1%": 0.2935527003347923,
"calibration/coverage@10%": 0.5848106517141017,
"calibration/coverage@15%": 0.7099468030690537,
"calibration/coverage@20%": 0.8737425404944587,
"calibration/coverage@25%": 0.9047229326513214,
"calibration/coverage@30%": 0.9556436487638533,
"calibration/coverage@5%": 0.5147975095628313,
"calibration/ece": 0.2035095831173826,
"calibration/mean_confidence": 0.6139547713848655,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.06501736111111112,
"completions/max_length": 4070.4,
"completions/max_terminated_length": 4070.4,
"completions/mean_length": 1302.47275390625,
"completions/mean_terminated_length": 1393.196337890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.0,
"epoch": 1.5975925300933738,
"grad_norm": 0.0020804195664823055,
"learning_rate": 2.2536057692307694e-06,
"loss": -0.1813,
"num_tokens": 1746472944.0,
"reward": 0.9607110142707824,
"reward_std": 0.19843303859233857,
"rewards/accuracy_reward": 0.6991319417953491,
"rewards/brier_reward": 0.7799984812736511,
"rewards/confidence_uniqueness_reward": 0.8859006881713867,
"rewards/format_reward": 0.9348958492279053,
"rewards/frontier_coverage_0": 0.005472905747592449,
"rewards/frontier_coverage_1": 0.005472905747592449,
"rewards/frontier_coverage_10": 0.04270212613046169,
"rewards/frontier_coverage_15": 0.1018251746892929,
"rewards/frontier_coverage_20": 0.17534002363681794,
"rewards/frontier_coverage_25": 0.2618106693029404,
"rewards/frontier_coverage_5": 0.0055509466677904126,
"rewards/frontier_entropy_batch_reward": -0.314467066526413,
"signal/accuracy_reward/centered_abs_mean": 0.15463324785232543,
"signal/accuracy_reward/group_std_mean": 0.20535095334053038,
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8738274693489074,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07731662392616272,
"signal/advantage_abs_mean": 0.722335159778595,
"signal/advantage_pre_scale_abs_mean": 0.14205503165721894,
"signal/advantage_pre_scale_std": 0.24266441464424132,
"signal/advantage_std": 0.983333969116211,
"signal/brier_reward/centered_abs_mean": 0.152623775601387,
"signal/brier_reward/group_std_mean": 0.2023110032081604,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17406882345676422,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015262378007173538,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10098161250352859,
"signal/confidence_uniqueness_reward/group_std_mean": 0.15298269987106322,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.11472240090370178,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010098160803318023,
"signal/format_reward/centered_abs_mean": 0.0970920130610466,
"signal/format_reward/group_std_mean": 0.1495143711566925,
"signal/format_reward/group_zero_std_frac": 0.49444445967674255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5503052711486817,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0485460065305233,
"signal/frontier_coverage_0/centered_abs_mean": 0.15222309529781342,
"signal/frontier_coverage_0/group_std_mean": 0.19828734695911407,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.024861392751336097,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002176790265366435,
"signal/frontier_coverage_1/centered_abs_mean": 0.15222309529781342,
"signal/frontier_coverage_1/group_std_mean": 0.19828734695911407,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.024861392751336097,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002176790265366435,
"signal/frontier_coverage_10/centered_abs_mean": 0.05907100513577461,
"signal/frontier_coverage_10/group_std_mean": 0.07500900477170944,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.009708347730338573,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008447154192253947,
"signal/frontier_coverage_15/centered_abs_mean": 0.07548296004533768,
"signal/frontier_coverage_15/group_std_mean": 0.09401365518569946,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01242184229195118,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010794063098728656,
"signal/frontier_coverage_20/centered_abs_mean": 0.10607408285140991,
"signal/frontier_coverage_20/group_std_mean": 0.13229668736457825,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01742022316902876,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015168593730777503,
"signal/frontier_coverage_25/centered_abs_mean": 0.14476778507232665,
"signal/frontier_coverage_25/group_std_mean": 0.1815311759710312,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023732788860797882,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020701792556792496,
"signal/frontier_coverage_5/centered_abs_mean": 0.15183787047863007,
"signal/frontier_coverage_5/group_std_mean": 0.19780696630477906,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02479843869805336,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002171281585469842,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3288115680217743,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39475221037864683,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.37834363579750063,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03288115821778774,
"step": 665
},
{
"calibration/aurc": 0.03806984555271315,
"calibration/batch_distribution_entropy": 0.9203345601826488,
"calibration/buffer_distribution_entropy": 0.9832592180175089,
"calibration/confidence_entropy": 0.4811035553222265,
"calibration/coverage@0%": 0.2287304922155279,
"calibration/coverage@1%": 0.3258552462180947,
"calibration/coverage@10%": 0.9037652661992339,
"calibration/coverage@15%": 0.9642066683779698,
"calibration/coverage@20%": 0.9946949602122016,
"calibration/coverage@25%": 1.0,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.7852124285822694,
"calibration/ece": 0.19911374108801527,
"calibration/mean_confidence": 0.6586080802205992,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03255208333333333,
"completions/max_length": 4073.6,
"completions/max_terminated_length": 4073.6,
"completions/mean_length": 1383.228759765625,
"completions/mean_terminated_length": 1428.3281982421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 273.6,
"epoch": 1.6095923800952487,
"grad_norm": 0.0021786438301205635,
"learning_rate": 2.2235576923076924e-06,
"loss": -0.0947,
"num_tokens": 1765514843.0,
"reward": 0.9914361715316773,
"reward_std": 0.1526610553264618,
"rewards/accuracy_reward": 0.7119791746139527,
"rewards/brier_reward": 0.8093498826026917,
"rewards/confidence_uniqueness_reward": 0.918257987499237,
"rewards/format_reward": 0.9673611164093018,
"rewards/frontier_coverage_0": 0.01678692139685154,
"rewards/frontier_coverage_1": 0.01678692139685154,
"rewards/frontier_coverage_10": 0.04643819592893124,
"rewards/frontier_coverage_15": 0.10272331386804581,
"rewards/frontier_coverage_20": 0.17622337937355043,
"rewards/frontier_coverage_25": 0.2643666982650757,
"rewards/frontier_coverage_5": 0.01684805955737829,
"rewards/frontier_entropy_batch_reward": -0.3014924913644791,
"signal/accuracy_reward/centered_abs_mean": 0.1404839426279068,
"signal/accuracy_reward/group_std_mean": 0.18494315445423126,
"signal/accuracy_reward/group_zero_std_frac": 0.475,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.964840543270111,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0702419713139534,
"signal/advantage_abs_mean": 0.7429413557052612,
"signal/advantage_pre_scale_abs_mean": 0.10971628427505493,
"signal/advantage_pre_scale_std": 0.19166867136955262,
"signal/advantage_std": 0.9831172704696656,
"signal/brier_reward/centered_abs_mean": 0.12622719258069992,
"signal/brier_reward/group_std_mean": 0.16899799108505248,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17211044132709502,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012622719258069992,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05944109708070755,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0957409456372261,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08047307804226875,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005944109987467527,
"signal/format_reward/centered_abs_mean": 0.05112847089767456,
"signal/format_reward/group_std_mean": 0.08646547794342041,
"signal/format_reward/group_zero_std_frac": 0.6805555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.34511254727840424,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02556423544883728,
"signal/frontier_coverage_0/centered_abs_mean": 0.1582293063402176,
"signal/frontier_coverage_0/group_std_mean": 0.20548607409000397,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030993625149130823,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022626791149377825,
"signal/frontier_coverage_1/centered_abs_mean": 0.1582293063402176,
"signal/frontier_coverage_1/group_std_mean": 0.20548607409000397,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030993625149130823,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022626791149377825,
"signal/frontier_coverage_10/centered_abs_mean": 0.05960306078195572,
"signal/frontier_coverage_10/group_std_mean": 0.07558953166007995,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011655074357986451,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008523237425833941,
"signal/frontier_coverage_15/centered_abs_mean": 0.0737259179353714,
"signal/frontier_coverage_15/group_std_mean": 0.09147704541683196,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014405173435807227,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001054280623793602,
"signal/frontier_coverage_20/centered_abs_mean": 0.102665276825428,
"signal/frontier_coverage_20/group_std_mean": 0.1282331794500351,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020072196424007416,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014681134838610888,
"signal/frontier_coverage_25/centered_abs_mean": 0.14084968566894532,
"signal/frontier_coverage_25/group_std_mean": 0.1767154097557068,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027554494515061378,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002014150540344417,
"signal/frontier_coverage_5/centered_abs_mean": 0.15790065228939057,
"signal/frontier_coverage_5/group_std_mean": 0.2050785392522812,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030929455906152724,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022579793119803073,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32375689744949343,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3865199089050293,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44135147929191587,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03237569145858288,
"step": 670
},
{
"calibration/aurc": 0.03729969376431475,
"calibration/batch_distribution_entropy": 0.9389808737258891,
"calibration/buffer_distribution_entropy": 0.9826168673707685,
"calibration/confidence_entropy": 0.48776771095909294,
"calibration/coverage@0%": 0.31871109818981125,
"calibration/coverage@1%": 0.5034221627249227,
"calibration/coverage@10%": 0.8701168438794713,
"calibration/coverage@15%": 0.9513196642868345,
"calibration/coverage@20%": 0.995822454308094,
"calibration/coverage@25%": 1.0,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.7422882641289732,
"calibration/ece": 0.2569934348082308,
"calibration/mean_confidence": 0.6003690197412586,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.016145833333333325,
"completions/max_length": 4074.0,
"completions/max_terminated_length": 4074.0,
"completions/mean_length": 1375.4995849609375,
"completions/mean_terminated_length": 1399.0270751953126,
"completions/min_length": 0.0,
"completions/min_terminated_length": 274.4,
"epoch": 1.6215922300971237,
"grad_norm": 0.002223934279754758,
"learning_rate": 2.1935096153846157e-06,
"loss": -0.0469,
"num_tokens": 1784455926.0,
"reward": 1.0157193899154664,
"reward_std": 0.12572815269231796,
"rewards/accuracy_reward": 0.7405381917953491,
"rewards/brier_reward": 0.8042338967323304,
"rewards/confidence_uniqueness_reward": 0.9352299451828003,
"rewards/format_reward": 0.9837673783302308,
"rewards/frontier_coverage_0": -0.008519930252805352,
"rewards/frontier_coverage_1": -0.008519930252805352,
"rewards/frontier_coverage_10": 0.03989522792398929,
"rewards/frontier_coverage_15": 0.09447171092033387,
"rewards/frontier_coverage_20": 0.16575570404529572,
"rewards/frontier_coverage_25": 0.25334414541721345,
"rewards/frontier_coverage_5": -0.008372036268701777,
"rewards/frontier_entropy_batch_reward": -0.27930985391139984,
"signal/accuracy_reward/centered_abs_mean": 0.12795681357383729,
"signal/accuracy_reward/group_std_mean": 0.17028040885925294,
"signal/accuracy_reward/group_zero_std_frac": 0.5138889014720917,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9479888319969177,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06397840678691864,
"signal/advantage_abs_mean": 0.7458012700080872,
"signal/advantage_pre_scale_abs_mean": 0.09115839749574661,
"signal/advantage_pre_scale_std": 0.15917887091636657,
"signal/advantage_std": 0.9829484939575195,
"signal/brier_reward/centered_abs_mean": 0.1257360503077507,
"signal/brier_reward/group_std_mean": 0.16337400376796724,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18875263929367064,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0125736054033041,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03575787842273712,
"signal/confidence_uniqueness_reward/group_std_mean": 0.059302129596471784,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.052689623832702634,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035757880192250012,
"signal/format_reward/centered_abs_mean": 0.025906032882630825,
"signal/format_reward/group_std_mean": 0.047686302289366725,
"signal/format_reward/group_zero_std_frac": 0.8083333492279052,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.18549492359161376,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012953016441315413,
"signal/frontier_coverage_0/centered_abs_mean": 0.1635303020477295,
"signal/frontier_coverage_0/group_std_mean": 0.2119036942720413,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035292362421751024,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00233848316129297,
"signal/frontier_coverage_1/centered_abs_mean": 0.1635303020477295,
"signal/frontier_coverage_1/group_std_mean": 0.2119036942720413,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035292362421751024,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00233848316129297,
"signal/frontier_coverage_10/centered_abs_mean": 0.06136737838387489,
"signal/frontier_coverage_10/group_std_mean": 0.07794718146324157,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013388168439269066,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008775535272434354,
"signal/frontier_coverage_15/centered_abs_mean": 0.07500097304582595,
"signal/frontier_coverage_15/group_std_mean": 0.09282598346471786,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016546625830233096,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010725139174610377,
"signal/frontier_coverage_20/centered_abs_mean": 0.10340526103973388,
"signal/frontier_coverage_20/group_std_mean": 0.1284177213907242,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02281036227941513,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001478695240803063,
"signal/frontier_coverage_25/centered_abs_mean": 0.1403847485780716,
"signal/frontier_coverage_25/group_std_mean": 0.17535466849803924,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030875445157289506,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020075018983334303,
"signal/frontier_coverage_5/centered_abs_mean": 0.16319799721240996,
"signal/frontier_coverage_5/group_std_mean": 0.21148067712783813,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035220272839069366,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002333731343969703,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31295692920684814,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3812874913215637,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.482098913192749,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03129569143056869,
"step": 675
},
{
"calibration/aurc": 0.14541435904658284,
"calibration/batch_distribution_entropy": 0.9751293794401581,
"calibration/buffer_distribution_entropy": 0.9829700635712373,
"calibration/confidence_entropy": 0.4906589674602201,
"calibration/coverage@0%": 0.088201334986161,
"calibration/coverage@1%": 0.13090966831949433,
"calibration/coverage@10%": 0.4090633628879498,
"calibration/coverage@15%": 0.65926727430129,
"calibration/coverage@20%": 0.7996981703743204,
"calibration/coverage@25%": 0.8428572417948488,
"calibration/coverage@30%": 0.8786738785658386,
"calibration/coverage@5%": 0.29535824941354677,
"calibration/ece": 0.16559922901748228,
"calibration/mean_confidence": 0.5466112312017535,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0064236111111110935,
"completions/max_length": 3937.8,
"completions/max_terminated_length": 3937.8,
"completions/mean_length": 1338.471533203125,
"completions/mean_terminated_length": 1347.26357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 309.6,
"epoch": 1.6335920800989987,
"grad_norm": 0.002288751769810915,
"learning_rate": 2.1634615384615387e-06,
"loss": -0.0171,
"num_tokens": 1803013230.0,
"reward": 1.0145124554634095,
"reward_std": 0.1102727472782135,
"rewards/accuracy_reward": 0.7199652910232544,
"rewards/brier_reward": 0.8168349623680115,
"rewards/confidence_uniqueness_reward": 0.9457002997398376,
"rewards/format_reward": 0.9935763835906982,
"rewards/frontier_coverage_0": 0.013051034219097346,
"rewards/frontier_coverage_1": 0.013051034219097346,
"rewards/frontier_coverage_10": 0.045426635444164275,
"rewards/frontier_coverage_15": 0.0962330624461174,
"rewards/frontier_coverage_20": 0.16461943387985228,
"rewards/frontier_coverage_25": 0.2484707236289978,
"rewards/frontier_coverage_5": 0.013126900864881464,
"rewards/frontier_entropy_batch_reward": -0.27005818486213684,
"signal/accuracy_reward/centered_abs_mean": 0.13260633796453475,
"signal/accuracy_reward/group_std_mean": 0.1719220072031021,
"signal/accuracy_reward/group_zero_std_frac": 0.522222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0306506514549256,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06630316898226737,
"signal/advantage_abs_mean": 0.7687285304069519,
"signal/advantage_pre_scale_abs_mean": 0.08383222371339798,
"signal/advantage_pre_scale_std": 0.136709526181221,
"signal/advantage_std": 0.9829292297363281,
"signal/brier_reward/centered_abs_mean": 0.11948189288377761,
"signal/brier_reward/group_std_mean": 0.1531725823879242,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1861775755882263,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011948189325630664,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02247364744544029,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03589537590742111,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0348218347877264,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002247364795766771,
"signal/format_reward/centered_abs_mean": 0.010948350746184587,
"signal/format_reward/group_std_mean": 0.02192477509379387,
"signal/format_reward/group_zero_std_frac": 0.9055555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08419957533478736,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0054741753730922936,
"signal/frontier_coverage_0/centered_abs_mean": 0.17799755036830903,
"signal/frontier_coverage_0/group_std_mean": 0.22672839164733888,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039601098746061325,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025453649461269377,
"signal/frontier_coverage_1/centered_abs_mean": 0.17799755036830903,
"signal/frontier_coverage_1/group_std_mean": 0.22672839164733888,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039601098746061325,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025453649461269377,
"signal/frontier_coverage_10/centered_abs_mean": 0.0633821927011013,
"signal/frontier_coverage_10/group_std_mean": 0.07961026728153228,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014111051522195339,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009063653764314949,
"signal/frontier_coverage_15/centered_abs_mean": 0.07539696991443634,
"signal/frontier_coverage_15/group_std_mean": 0.09288933426141739,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01682089865207672,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010781767079606653,
"signal/frontier_coverage_20/centered_abs_mean": 0.1028669998049736,
"signal/frontier_coverage_20/group_std_mean": 0.12718904614448548,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02296198531985283,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001470998045988381,
"signal/frontier_coverage_25/centered_abs_mean": 0.13958117067813874,
"signal/frontier_coverage_25/group_std_mean": 0.17355575263500214,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03115156516432762,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001996010704897344,
"signal/frontier_coverage_5/centered_abs_mean": 0.17765924632549285,
"signal/frontier_coverage_5/group_std_mean": 0.22631218731403352,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03952649161219597,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002540527284145355,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.318440192937851,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3866540253162384,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4967281222343445,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03184402026236057,
"step": 680
},
{
"calibration/aurc": 0.15669870698176264,
"calibration/batch_distribution_entropy": 0.9750650248851596,
"calibration/buffer_distribution_entropy": 0.9835866869663812,
"calibration/confidence_entropy": 0.502090886950683,
"calibration/coverage@0%": 0.10573188642297648,
"calibration/coverage@1%": 0.11146105308964316,
"calibration/coverage@10%": 0.398634682332463,
"calibration/coverage@15%": 0.4460699521322889,
"calibration/coverage@20%": 0.6534119342906874,
"calibration/coverage@25%": 0.8571080831157529,
"calibration/coverage@30%": 0.9274749782419496,
"calibration/coverage@5%": 0.2270860530896432,
"calibration/ece": 0.18874397584720412,
"calibration/mean_confidence": 0.5418572219307551,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00269097222222221,
"completions/max_length": 3918.6,
"completions/max_terminated_length": 3918.6,
"completions/mean_length": 1182.0064208984375,
"completions/mean_terminated_length": 1185.24580078125,
"completions/min_length": 55.4,
"completions/min_terminated_length": 240.6,
"epoch": 1.6455919301008737,
"grad_norm": 0.0022894926369190216,
"learning_rate": 2.1334134615384616e-06,
"loss": -0.0028,
"num_tokens": 1819744696.0,
"reward": 1.025932240486145,
"reward_std": 0.09519761502742767,
"rewards/accuracy_reward": 0.7417534589767456,
"rewards/brier_reward": 0.8289115786552429,
"rewards/confidence_uniqueness_reward": 0.9471697688102723,
"rewards/format_reward": 0.9973090291023254,
"rewards/frontier_coverage_0": 0.0073368697427213195,
"rewards/frontier_coverage_1": 0.0073368697427213195,
"rewards/frontier_coverage_10": 0.04692419543862343,
"rewards/frontier_coverage_15": 0.10556664913892747,
"rewards/frontier_coverage_20": 0.1815480649471283,
"rewards/frontier_coverage_25": 0.2728467047214508,
"rewards/frontier_coverage_5": 0.007369892485439777,
"rewards/frontier_entropy_batch_reward": -0.30200849175453187,
"signal/accuracy_reward/centered_abs_mean": 0.10688476413488388,
"signal/accuracy_reward/group_std_mean": 0.14036836326122284,
"signal/accuracy_reward/group_zero_std_frac": 0.6027777791023254,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9464346647262574,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05344238206744194,
"signal/advantage_abs_mean": 0.7758130669593811,
"signal/advantage_pre_scale_abs_mean": 0.07317983582615853,
"signal/advantage_pre_scale_std": 0.1202566534280777,
"signal/advantage_std": 0.982705807685852,
"signal/brier_reward/centered_abs_mean": 0.1028501957654953,
"signal/brier_reward/group_std_mean": 0.13237460404634477,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18268101513385773,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010285019874572754,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01821411997079849,
"signal/confidence_uniqueness_reward/group_std_mean": 0.025956546515226366,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03234106935560703,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001821412146091461,
"signal/format_reward/centered_abs_mean": 0.004866536427289248,
"signal/format_reward/group_std_mean": 0.009479801915585995,
"signal/format_reward/group_zero_std_frac": 0.9611111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04194744750857353,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002433268213644624,
"signal/frontier_coverage_0/centered_abs_mean": 0.15280999839305878,
"signal/frontier_coverage_0/group_std_mean": 0.1967985898256302,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038797355443239215,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002185182925313711,
"signal/frontier_coverage_1/centered_abs_mean": 0.15280999839305878,
"signal/frontier_coverage_1/group_std_mean": 0.1967985898256302,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038797355443239215,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002185182925313711,
"signal/frontier_coverage_10/centered_abs_mean": 0.05912318155169487,
"signal/frontier_coverage_10/group_std_mean": 0.07436081171035766,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01506075393408537,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008454615017399192,
"signal/frontier_coverage_15/centered_abs_mean": 0.0739786371588707,
"signal/frontier_coverage_15/group_std_mean": 0.09121221601963043,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018956642411649228,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00105789452791214,
"signal/frontier_coverage_20/centered_abs_mean": 0.10158449411392212,
"signal/frontier_coverage_20/group_std_mean": 0.12596147805452346,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0260475505143404,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014526582323014735,
"signal/frontier_coverage_25/centered_abs_mean": 0.1375407963991165,
"signal/frontier_coverage_25/group_std_mean": 0.17120290398597718,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.035241054370999336,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019668332999572156,
"signal/frontier_coverage_5/centered_abs_mean": 0.15252489149570464,
"signal/frontier_coverage_5/group_std_mean": 0.19644558429718018,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038724697381258014,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002181106014177203,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32505291104316714,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39162933826446533,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5808179020881653,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03250529170036316,
"step": 685
},
{
"calibration/aurc": 0.1229704696194999,
"calibration/batch_distribution_entropy": 0.976383856114509,
"calibration/buffer_distribution_entropy": 0.9839021225946876,
"calibration/confidence_entropy": 0.48749734732868155,
"calibration/coverage@0%": 0.07091239850040326,
"calibration/coverage@1%": 0.1573707318337366,
"calibration/coverage@10%": 0.5240379894422194,
"calibration/coverage@15%": 0.6932473648266875,
"calibration/coverage@20%": 0.7662947364314063,
"calibration/coverage@25%": 0.8341147506481847,
"calibration/coverage@30%": 0.9254281978638378,
"calibration/coverage@5%": 0.3409230055500116,
"calibration/ece": 0.1518131361828975,
"calibration/mean_confidence": 0.5588064428822697,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0006944444444444642,
"completions/max_length": 3496.6,
"completions/max_terminated_length": 3496.6,
"completions/mean_length": 1107.756689453125,
"completions/mean_terminated_length": 1108.5136474609376,
"completions/min_length": 84.4,
"completions/min_terminated_length": 236.2,
"epoch": 1.6575917801027487,
"grad_norm": 0.002715888200327754,
"learning_rate": 2.103365384615385e-06,
"loss": 0.0081,
"num_tokens": 1835604645.0,
"reward": 1.019283664226532,
"reward_std": 0.10603798031806946,
"rewards/accuracy_reward": 0.7253472328186035,
"rewards/brier_reward": 0.8144157767295838,
"rewards/confidence_uniqueness_reward": 0.9509435057640075,
"rewards/format_reward": 0.9993055582046508,
"rewards/frontier_coverage_0": 0.0016073930077254773,
"rewards/frontier_coverage_1": 0.0016073930077254773,
"rewards/frontier_coverage_10": 0.042944446206092834,
"rewards/frontier_coverage_15": 0.09877827614545823,
"rewards/frontier_coverage_20": 0.16919994354248047,
"rewards/frontier_coverage_25": 0.2534219026565552,
"rewards/frontier_coverage_5": 0.0017386081628501416,
"rewards/frontier_entropy_batch_reward": -0.27719637751579285,
"signal/accuracy_reward/centered_abs_mean": 0.1352647602558136,
"signal/accuracy_reward/group_std_mean": 0.17324215471744536,
"signal/accuracy_reward/group_zero_std_frac": 0.5277777791023255,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0748308062553407,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0676323801279068,
"signal/advantage_abs_mean": 0.7841925501823426,
"signal/advantage_pre_scale_abs_mean": 0.08377051204442978,
"signal/advantage_pre_scale_std": 0.12926071733236313,
"signal/advantage_std": 0.9828932642936706,
"signal/brier_reward/centered_abs_mean": 0.11323733180761338,
"signal/brier_reward/group_std_mean": 0.1463605895638466,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18019480109214783,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011323734000325204,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014994461461901665,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02000431716442108,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02399727888405323,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001499446202069521,
"signal/format_reward/centered_abs_mean": 0.001312933990266174,
"signal/format_reward/group_std_mean": 0.0030315483920276163,
"signal/format_reward/group_zero_std_frac": 0.9861111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010730944946408272,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000656466995133087,
"signal/frontier_coverage_0/centered_abs_mean": 0.16635308563709258,
"signal/frontier_coverage_0/group_std_mean": 0.21567732095718384,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037881956249475476,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023788490798324347,
"signal/frontier_coverage_1/centered_abs_mean": 0.16635308563709258,
"signal/frontier_coverage_1/group_std_mean": 0.21567732095718384,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037881956249475476,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023788490798324347,
"signal/frontier_coverage_10/centered_abs_mean": 0.062270589917898175,
"signal/frontier_coverage_10/group_std_mean": 0.07897855192422867,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014190655015408993,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008904694463126361,
"signal/frontier_coverage_15/centered_abs_mean": 0.07686868906021119,
"signal/frontier_coverage_15/group_std_mean": 0.09543234705924988,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017530930414795875,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010992222232744098,
"signal/frontier_coverage_20/centered_abs_mean": 0.10772657692432404,
"signal/frontier_coverage_20/group_std_mean": 0.133695587515831,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024576536566019058,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015404900535941125,
"signal/frontier_coverage_25/centered_abs_mean": 0.14818452894687653,
"signal/frontier_coverage_25/group_std_mean": 0.1841533213853836,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03380677923560142,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002119038766250014,
"signal/frontier_coverage_5/centered_abs_mean": 0.16605048775672912,
"signal/frontier_coverage_5/group_std_mean": 0.21530235409736634,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03781315460801125,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002374521875753999,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3271877884864807,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3948762595653534,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5215847194194794,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03271878063678742,
"step": 690
},
{
"calibration/aurc": 0.12016505696814503,
"calibration/batch_distribution_entropy": 0.9630713458254473,
"calibration/buffer_distribution_entropy": 0.9853825829045704,
"calibration/confidence_entropy": 0.4604225348032044,
"calibration/coverage@0%": 0.10989583333333333,
"calibration/coverage@1%": 0.2588541666666667,
"calibration/coverage@10%": 0.5395833333333333,
"calibration/coverage@15%": 0.6171874999999999,
"calibration/coverage@20%": 0.7697916666666667,
"calibration/coverage@25%": 0.8526041666666668,
"calibration/coverage@30%": 0.9010416666666667,
"calibration/coverage@5%": 0.4583333333333333,
"calibration/ece": 0.1897616322916667,
"calibration/mean_confidence": 0.5404426177083334,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005208333333333482,
"completions/max_length": 3440.2,
"completions/max_terminated_length": 3440.2,
"completions/mean_length": 1030.8426391601563,
"completions/mean_terminated_length": 1031.3654174804688,
"completions/min_length": 136.4,
"completions/min_terminated_length": 261.4,
"epoch": 1.6695916301046236,
"grad_norm": 0.0026529114693403244,
"learning_rate": 2.073317307692308e-06,
"loss": 0.0088,
"num_tokens": 1850554288.0,
"reward": 1.0253738403320312,
"reward_std": 0.10471922159194946,
"rewards/accuracy_reward": 0.7399305582046509,
"rewards/brier_reward": 0.8257181644439697,
"rewards/confidence_uniqueness_reward": 0.9484269022941589,
"rewards/format_reward": 0.9994791626930237,
"rewards/frontier_coverage_0": 0.005620070081204176,
"rewards/frontier_coverage_1": 0.005620070081204176,
"rewards/frontier_coverage_10": 0.04614498615264893,
"rewards/frontier_coverage_15": 0.10905924439430237,
"rewards/frontier_coverage_20": 0.18669271767139434,
"rewards/frontier_coverage_25": 0.2785437643527985,
"rewards/frontier_coverage_5": 0.0057056773453950885,
"rewards/frontier_entropy_batch_reward": -0.3086018800735474,
"signal/accuracy_reward/centered_abs_mean": 0.13557942658662797,
"signal/accuracy_reward/group_std_mean": 0.18026112020015717,
"signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0554683685302735,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06778971329331399,
"signal/advantage_abs_mean": 0.7678684234619141,
"signal/advantage_pre_scale_abs_mean": 0.08026924282312393,
"signal/advantage_pre_scale_std": 0.1252484291791916,
"signal/advantage_std": 0.982914924621582,
"signal/brier_reward/centered_abs_mean": 0.10984794348478318,
"signal/brier_reward/group_std_mean": 0.14237151443958282,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17236720025539398,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010984793864190578,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01557443682104349,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020670870319008827,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024543348327279092,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015574437333270908,
"signal/format_reward/centered_abs_mean": 0.0009982638759538532,
"signal/format_reward/group_std_mean": 0.0026473373174667357,
"signal/format_reward/group_zero_std_frac": 0.9861111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007815391756594181,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004991319379769266,
"signal/frontier_coverage_0/centered_abs_mean": 0.1692986935377121,
"signal/frontier_coverage_0/group_std_mean": 0.21854256391525267,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03790202885866165,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024209713097661733,
"signal/frontier_coverage_1/centered_abs_mean": 0.1692986935377121,
"signal/frontier_coverage_1/group_std_mean": 0.21854256391525267,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03790202885866165,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024209713097661733,
"signal/frontier_coverage_10/centered_abs_mean": 0.06352206021547317,
"signal/frontier_coverage_10/group_std_mean": 0.08006793260574341,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01428398210555315,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000908365473151207,
"signal/frontier_coverage_15/centered_abs_mean": 0.07578349262475967,
"signal/frontier_coverage_15/group_std_mean": 0.09459168761968613,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017029393836855887,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010837039677426218,
"signal/frontier_coverage_20/centered_abs_mean": 0.10363190919160843,
"signal/frontier_coverage_20/group_std_mean": 0.13097960501909256,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023263034224510194,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014819363364949822,
"signal/frontier_coverage_25/centered_abs_mean": 0.14058729410171508,
"signal/frontier_coverage_25/group_std_mean": 0.17922786474227906,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03153381682932377,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00201039828825742,
"signal/frontier_coverage_5/centered_abs_mean": 0.16897266507148742,
"signal/frontier_coverage_5/group_std_mean": 0.21812840402126313,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03782900050282478,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002416309108957648,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3316554367542267,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3980835318565369,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5211562156677246,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03316554352641106,
"step": 695
},
{
"calibration/aurc": 0.13793057252636853,
"calibration/batch_distribution_entropy": 0.9191704771321465,
"calibration/buffer_distribution_entropy": 0.9850459534761482,
"calibration/confidence_entropy": 0.48348108765589587,
"calibration/coverage@0%": 0.03594022687609075,
"calibration/coverage@1%": 0.11094022687609073,
"calibration/coverage@10%": 0.36704842931937176,
"calibration/coverage@15%": 0.5469213568935428,
"calibration/coverage@20%": 0.8864119764397905,
"calibration/coverage@25%": 0.9385416666666668,
"calibration/coverage@30%": 0.96875,
"calibration/coverage@5%": 0.21469240837696332,
"calibration/ece": 0.15457895792702878,
"calibration/mean_confidence": 0.6365902838432592,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000868055555555558,
"completions/max_length": 3035.0,
"completions/max_terminated_length": 3035.0,
"completions/mean_length": 993.9149169921875,
"completions/mean_terminated_length": 994.7564208984375,
"completions/min_length": 51.6,
"completions/min_terminated_length": 224.6,
"epoch": 1.6815914801064986,
"grad_norm": 0.0027931861113756895,
"learning_rate": 2.043269230769231e-06,
"loss": 0.0022,
"num_tokens": 1865124220.0,
"reward": 1.0046656370162963,
"reward_std": 0.09352846145629883,
"rewards/accuracy_reward": 0.706249988079071,
"rewards/brier_reward": 0.8287553548812866,
"rewards/confidence_uniqueness_reward": 0.9467250108718872,
"rewards/format_reward": 0.9991319417953491,
"rewards/frontier_coverage_0": 0.026872091740369797,
"rewards/frontier_coverage_1": 0.026872091740369797,
"rewards/frontier_coverage_10": 0.04779320433735847,
"rewards/frontier_coverage_15": 0.0996496319770813,
"rewards/frontier_coverage_20": 0.16897362768650054,
"rewards/frontier_coverage_25": 0.253493994474411,
"rewards/frontier_coverage_5": 0.026916111633181573,
"rewards/frontier_entropy_batch_reward": -0.34876567125320435,
"signal/accuracy_reward/centered_abs_mean": 0.0991970494389534,
"signal/accuracy_reward/group_std_mean": 0.1393287718296051,
"signal/accuracy_reward/group_zero_std_frac": 0.5694444417953491,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.842095923423767,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0495985247194767,
"signal/advantage_abs_mean": 0.7627769827842712,
"signal/advantage_pre_scale_abs_mean": 0.07055802345275879,
"signal/advantage_pre_scale_std": 0.11469898372888565,
"signal/advantage_std": 0.9827576041221618,
"signal/brier_reward/centered_abs_mean": 0.1017798662185669,
"signal/brier_reward/group_std_mean": 0.13196168690919877,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17585197389125823,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010177987068891526,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015178951062262058,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020427386462688445,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026487966254353523,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015178951434791088,
"signal/format_reward/centered_abs_mean": 0.0015733506763353944,
"signal/format_reward/group_std_mean": 0.0034799596294760706,
"signal/format_reward/group_zero_std_frac": 0.9833333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014351568464189769,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0007866753381676972,
"signal/frontier_coverage_0/centered_abs_mean": 0.1307838648557663,
"signal/frontier_coverage_0/group_std_mean": 0.173773917555809,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032213568314909936,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001870209281332791,
"signal/frontier_coverage_1/centered_abs_mean": 0.1307838648557663,
"signal/frontier_coverage_1/group_std_mean": 0.173773917555809,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032213568314909936,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001870209281332791,
"signal/frontier_coverage_10/centered_abs_mean": 0.055519319325685504,
"signal/frontier_coverage_10/group_std_mean": 0.07080635875463485,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013711910881102084,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007939262664876878,
"signal/frontier_coverage_15/centered_abs_mean": 0.07911764830350876,
"signal/frontier_coverage_15/group_std_mean": 0.09775821417570114,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019597242772579192,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001131382375024259,
"signal/frontier_coverage_20/centered_abs_mean": 0.1124587595462799,
"signal/frontier_coverage_20/group_std_mean": 0.13922121226787568,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027819440886378288,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016081602778285741,
"signal/frontier_coverage_25/centered_abs_mean": 0.1522460699081421,
"signal/frontier_coverage_25/group_std_mean": 0.1895580768585205,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037604338675737384,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021771186031401156,
"signal/frontier_coverage_5/centered_abs_mean": 0.13061472475528718,
"signal/frontier_coverage_5/group_std_mean": 0.17355478703975677,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0321720227599144,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018677905201911927,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.330916690826416,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39551963210105895,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5743350386619568,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033091668784618375,
"step": 700
},
{
"epoch": 1.6815914801064986,
"eval_calibration/aurc": 0.18089802504255273,
"eval_calibration/batch_distribution_entropy": 0.9028164891686236,
"eval_calibration/buffer_distribution_entropy": 0.9841904792966663,
"eval_calibration/confidence_entropy": 0.47011778535507714,
"eval_calibration/coverage@0%": 0.15104166666666666,
"eval_calibration/coverage@1%": 0.15104166666666666,
"eval_calibration/coverage@10%": 0.390625,
"eval_calibration/coverage@15%": 0.640625,
"eval_calibration/coverage@20%": 0.8020833333333334,
"eval_calibration/coverage@25%": 0.8541666666666666,
"eval_calibration/coverage@30%": 0.90625,
"eval_calibration/coverage@5%": 0.15104166666666666,
"eval_calibration/ece": 0.20879,
"eval_calibration/mean_confidence": 0.6107004166666666,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 2728.0,
"eval_completions/max_terminated_length": 2728.0,
"eval_completions/mean_length": 1002.8826700846354,
"eval_completions/mean_terminated_length": 1002.8826700846354,
"eval_completions/min_length": 291.3333333333333,
"eval_completions/min_terminated_length": 291.3333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 1865124220.0,
"eval_reward": 0.9274651606877645,
"eval_reward_std": 0.23160785188277563,
"eval_rewards/accuracy_reward": 0.6901041666666666,
"eval_rewards/brier_reward": 0.8312720060348511,
"eval_rewards/confidence_uniqueness_reward": 0.8948567608992258,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_coverage_0": 0.0433860938064754,
"eval_rewards/frontier_coverage_1": 0.0433860938064754,
"eval_rewards/frontier_coverage_10": 0.04865478724241257,
"eval_rewards/frontier_coverage_15": 0.09748644630114238,
"eval_rewards/frontier_coverage_20": 0.16351032753785452,
"eval_rewards/frontier_coverage_25": 0.24551946173111597,
"eval_rewards/frontier_coverage_5": 0.04338224340851108,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 158.4425,
"eval_samples_per_second": 6.311,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4116210887829463,
"eval_signal/accuracy_reward/group_std_mean": 0.4593142320712407,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.892851193745931,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20581054439147314,
"eval_signal/advantage_abs_mean": 0.8790315886338552,
"eval_signal/advantage_pre_scale_abs_mean": 0.20490500579277673,
"eval_signal/advantage_pre_scale_std": 0.22919744749863943,
"eval_signal/advantage_std": 0.9863952895005544,
"eval_signal/brier_reward/centered_abs_mean": 0.1591823771595955,
"eval_signal/brier_reward/group_std_mean": 0.21574609478314719,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06901257298886776,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01591823762282729,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0410291887819767,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04890784186621507,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01784918162350853,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00410291882387052,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.25617842624584836,
"eval_signal/frontier_coverage_0/group_std_mean": 0.35960617661476135,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01589485149209698,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036633514488736787,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.25617842624584836,
"eval_signal/frontier_coverage_1/group_std_mean": 0.35960617661476135,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01589485149209698,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036633514488736787,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.07833195229371388,
"eval_signal/frontier_coverage_10/group_std_mean": 0.10744242370128632,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004864773480221629,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011201469460502267,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.11549535890420277,
"eval_signal/frontier_coverage_15/group_std_mean": 0.14778297146161398,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007180764805525541,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016515836274872224,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.19812731196482977,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2471755420168241,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012313599543025097,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002833220448034505,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.29754012326399487,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3664591312408447,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01848344939450423,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004254823783412576,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.25584504504998523,
"eval_signal/frontier_coverage_5/group_std_mean": 0.35919003188610077,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01587420531238119,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036585842026397586,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.038,
"step": 700
},
{
"epoch": 1.6815914801064986,
"step": 700,
"train_probe_calibration/aurc": 0.07127535899437333,
"train_probe_calibration/batch_distribution_entropy": 0.9124863581279891,
"train_probe_calibration/buffer_distribution_entropy": 0.9839733115888903,
"train_probe_calibration/confidence_entropy": 0.5049382609896232,
"train_probe_calibration/coverage@0%": 0.4270833333333333,
"train_probe_calibration/coverage@1%": 0.4270833333333333,
"train_probe_calibration/coverage@10%": 0.7552083333333334,
"train_probe_calibration/coverage@15%": 0.8489583333333334,
"train_probe_calibration/coverage@20%": 0.9427083333333334,
"train_probe_calibration/coverage@25%": 0.9791666666666666,
"train_probe_calibration/coverage@30%": 1.0,
"train_probe_calibration/coverage@5%": 0.515625,
"train_probe_calibration/ece": 0.22284895833333332,
"train_probe_calibration/mean_confidence": 0.6038437499999999,
"train_probe_completions/clipped_ratio": 0.0,
"train_probe_completions/max_length": 2894.3333333333335,
"train_probe_completions/max_terminated_length": 2894.3333333333335,
"train_probe_completions/mean_length": 1015.2335306803385,
"train_probe_completions/mean_terminated_length": 1015.2335306803385,
"train_probe_completions/min_length": 299.6666666666667,
"train_probe_completions/min_terminated_length": 299.6666666666667,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1865124220.0,
"train_probe_reward": 0.96530615290006,
"train_probe_reward_std": 0.2089141458272934,
"train_probe_rewards/accuracy_reward": 0.7664930621782938,
"train_probe_rewards/brier_reward": 0.8375353117783865,
"train_probe_rewards/confidence_uniqueness_reward": 0.895941843589147,
"train_probe_rewards/format_reward": 1.0,
"train_probe_rewards/frontier_coverage_0": -0.006343296496197581,
"train_probe_rewards/frontier_coverage_1": -0.006343296496197581,
"train_probe_rewards/frontier_coverage_10": 0.04194001046319803,
"train_probe_rewards/frontier_coverage_15": 0.10766408095757167,
"train_probe_rewards/frontier_coverage_20": 0.18949769685665765,
"train_probe_rewards/frontier_coverage_25": 0.28906770547231037,
"train_probe_rewards/frontier_coverage_5": -0.0062605949739615125,
"train_probe_rewards/frontier_entropy_batch_reward": -1.0,
"train_probe_runtime": 150.7247,
"train_probe_samples_per_second": 6.635,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3511827240387599,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4241461455821991,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8484959204991659,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17559136201937994,
"train_probe_signal/advantage_abs_mean": 0.8118196030457815,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.17097164442141852,
"train_probe_signal/advantage_pre_scale_std": 0.20714367926120758,
"train_probe_signal/advantage_std": 0.9863471786181132,
"train_probe_signal/brier_reward/centered_abs_mean": 0.14898951599995294,
"train_probe_signal/brier_reward/group_std_mean": 0.20260730385780334,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07194508115450542,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.014898952251921097,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.042310927684108414,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.04979841659466425,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020431222083667915,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042310926752785845,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.23911839226881662,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.3518788516521454,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016536445822566748,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003419393013852338,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.23911839226881662,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.3518788516521454,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016536445822566748,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003419393013852338,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07297215610742569,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.1017376904686292,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0050443368187795086,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010435017951143284,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.10844651361306508,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.1344559801121553,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007490781756738822,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00155078514944762,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.18234311292568842,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.2210288643836975,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012593142222613096,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002607506583444774,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2681734710931778,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.3245113790035248,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01852063648402691,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00383488069443653,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.23879685004552206,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.35146549840768176,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016514215618371964,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034147949190810323,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"train_probe_steps_per_second": 0.04
},
{
"calibration/aurc": 0.11227090677110904,
"calibration/batch_distribution_entropy": 0.948779230760648,
"calibration/buffer_distribution_entropy": 0.9835474650215609,
"calibration/confidence_entropy": 0.49129050024545345,
"calibration/coverage@0%": 0.040625,
"calibration/coverage@1%": 0.040625,
"calibration/coverage@10%": 0.5630208333333333,
"calibration/coverage@15%": 0.7036458333333334,
"calibration/coverage@20%": 0.8458333333333332,
"calibration/coverage@25%": 0.909375,
"calibration/coverage@30%": 0.9583333333333333,
"calibration/coverage@5%": 0.3458333333333333,
"calibration/ece": 0.18081584374999998,
"calibration/mean_confidence": 0.6116592604166666,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0004340277777777901,
"completions/max_length": 3177.6,
"completions/max_terminated_length": 3177.6,
"completions/mean_length": 986.2343017578125,
"completions/mean_terminated_length": 986.6416015625,
"completions/min_length": 158.2,
"completions/min_terminated_length": 269.6,
"epoch": 1.6935913301083736,
"grad_norm": 0.0028598185162991285,
"learning_rate": 2.013221153846154e-06,
"loss": 0.0069,
"num_tokens": 1879571655.0,
"reward": 1.019974374771118,
"reward_std": 0.09644923657178879,
"rewards/accuracy_reward": 0.7236979126930236,
"rewards/brier_reward": 0.8357795834541321,
"rewards/confidence_uniqueness_reward": 0.9502484321594238,
"rewards/format_reward": 0.9995659708976745,
"rewards/frontier_coverage_0": 0.02306669168174267,
"rewards/frontier_coverage_1": 0.02306669168174267,
"rewards/frontier_coverage_10": 0.04798247441649437,
"rewards/frontier_coverage_15": 0.10166206508874893,
"rewards/frontier_coverage_20": 0.17447640299797057,
"rewards/frontier_coverage_25": 0.2648200333118439,
"rewards/frontier_coverage_5": 0.02308344580233097,
"rewards/frontier_entropy_batch_reward": -0.2967200607061386,
"signal/accuracy_reward/centered_abs_mean": 0.11241862177848816,
"signal/accuracy_reward/group_std_mean": 0.15345828533172606,
"signal/accuracy_reward/group_zero_std_frac": 0.5472222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9400637745857239,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05620931088924408,
"signal/advantage_abs_mean": 0.763127076625824,
"signal/advantage_pre_scale_abs_mean": 0.0733158528804779,
"signal/advantage_pre_scale_std": 0.11780442744493484,
"signal/advantage_std": 0.9828173637390136,
"signal/brier_reward/centered_abs_mean": 0.09998511821031571,
"signal/brier_reward/group_std_mean": 0.130735120177269,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16749710142612456,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009998511895537377,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014156256802380085,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018865460343658925,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02363467663526535,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014156257035210728,
"signal/format_reward/centered_abs_mean": 0.0008300781133584678,
"signal/format_reward/group_std_mean": 0.0021562909707427023,
"signal/format_reward/group_zero_std_frac": 0.9888888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006732623372226953,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004150390566792339,
"signal/frontier_coverage_0/centered_abs_mean": 0.1440996915102005,
"signal/frontier_coverage_0/group_std_mean": 0.19001898765563965,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0345321387052536,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020606255857273936,
"signal/frontier_coverage_1/centered_abs_mean": 0.1440996915102005,
"signal/frontier_coverage_1/group_std_mean": 0.19001898765563965,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0345321387052536,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020606255857273936,
"signal/frontier_coverage_10/centered_abs_mean": 0.056343245506286624,
"signal/frontier_coverage_10/group_std_mean": 0.07241087406873703,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013484322652220725,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008057083818130195,
"signal/frontier_coverage_15/centered_abs_mean": 0.07188424617052078,
"signal/frontier_coverage_15/group_std_mean": 0.08956207633018494,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017177759483456612,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001027944718953222,
"signal/frontier_coverage_20/centered_abs_mean": 0.10164368897676468,
"signal/frontier_coverage_20/group_std_mean": 0.127097025513649,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024291865527629852,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001453504804521799,
"signal/frontier_coverage_25/centered_abs_mean": 0.13992716670036315,
"signal/frontier_coverage_25/group_std_mean": 0.17583813071250914,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.033450279384851456,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020009585423395038,
"signal/frontier_coverage_5/centered_abs_mean": 0.1439125806093216,
"signal/frontier_coverage_5/group_std_mean": 0.18978277444839478,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03448736071586609,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020579498959705234,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32811395525932313,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3939893305301666,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5487881243228913,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032811397686600685,
"step": 705
},
{
"calibration/aurc": 0.11177033556535712,
"calibration/batch_distribution_entropy": 0.976128231345325,
"calibration/buffer_distribution_entropy": 0.9831304894900681,
"calibration/confidence_entropy": 0.4772357399211125,
"calibration/coverage@0%": 0.10104166666666667,
"calibration/coverage@1%": 0.2026041666666667,
"calibration/coverage@10%": 0.6203125,
"calibration/coverage@15%": 0.7109375,
"calibration/coverage@20%": 0.7885416666666667,
"calibration/coverage@25%": 0.8604166666666668,
"calibration/coverage@30%": 0.909375,
"calibration/coverage@5%": 0.4421875,
"calibration/ece": 0.1605678541666667,
"calibration/mean_confidence": 0.518036125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0013020833333333482,
"completions/max_length": 3288.6,
"completions/max_terminated_length": 3288.6,
"completions/mean_length": 952.0667602539063,
"completions/mean_terminated_length": 953.1977416992188,
"completions/min_length": 123.0,
"completions/min_terminated_length": 214.0,
"epoch": 1.7055911801102486,
"grad_norm": 0.0027350601740181446,
"learning_rate": 1.983173076923077e-06,
"loss": 0.0059,
"num_tokens": 1893672808.0,
"reward": 1.0128086686134339,
"reward_std": 0.09612514525651931,
"rewards/accuracy_reward": 0.7134548664093018,
"rewards/brier_reward": 0.8288593173027039,
"rewards/confidence_uniqueness_reward": 0.9479322075843811,
"rewards/format_reward": 0.9986979246139527,
"rewards/frontier_coverage_0": 0.032473142445087436,
"rewards/frontier_coverage_1": 0.032473142445087436,
"rewards/frontier_coverage_10": 0.05002719163894653,
"rewards/frontier_coverage_15": 0.09773223251104354,
"rewards/frontier_coverage_20": 0.1664465069770813,
"rewards/frontier_coverage_25": 0.25369060337543486,
"rewards/frontier_coverage_5": 0.032423215731978414,
"rewards/frontier_entropy_batch_reward": -0.3046021282672882,
"signal/accuracy_reward/centered_abs_mean": 0.11392686665058135,
"signal/accuracy_reward/group_std_mean": 0.15370176434516908,
"signal/accuracy_reward/group_zero_std_frac": 0.5527777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9989817976951599,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05696343332529068,
"signal/advantage_abs_mean": 0.7665924191474914,
"signal/advantage_pre_scale_abs_mean": 0.07367298156023025,
"signal/advantage_pre_scale_std": 0.11973680555820465,
"signal/advantage_std": 0.9827433586120605,
"signal/brier_reward/centered_abs_mean": 0.10304120779037476,
"signal/brier_reward/group_std_mean": 0.13187731206417083,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1804224044084549,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010304121114313603,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015719205886125565,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020367484539747238,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0275027796626091,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015719205373898148,
"signal/format_reward/centered_abs_mean": 0.00201280377805233,
"signal/format_reward/group_std_mean": 0.0031654864549636843,
"signal/format_reward/group_zero_std_frac": 0.9888888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017516496032476424,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001006401889026165,
"signal/frontier_coverage_0/centered_abs_mean": 0.15068837106227875,
"signal/frontier_coverage_0/group_std_mean": 0.1956734299659729,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0377209234982729,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021548437187448146,
"signal/frontier_coverage_1/centered_abs_mean": 0.15068837106227875,
"signal/frontier_coverage_1/group_std_mean": 0.1956734299659729,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0377209234982729,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021548437187448146,
"signal/frontier_coverage_10/centered_abs_mean": 0.0584891103208065,
"signal/frontier_coverage_10/group_std_mean": 0.07460076361894608,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014630392752587795,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008363942615687847,
"signal/frontier_coverage_15/centered_abs_mean": 0.0715473860502243,
"signal/frontier_coverage_15/group_std_mean": 0.08882242441177368,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017906750738620757,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001023127674125135,
"signal/frontier_coverage_20/centered_abs_mean": 0.09926576763391495,
"signal/frontier_coverage_20/group_std_mean": 0.12329905033111573,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024845069274306297,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014195004710927605,
"signal/frontier_coverage_25/centered_abs_mean": 0.13574471473693847,
"signal/frontier_coverage_25/group_std_mean": 0.16967822313308717,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03398062214255333,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019411493558436632,
"signal/frontier_coverage_5/centered_abs_mean": 0.15050061643123627,
"signal/frontier_coverage_5/group_std_mean": 0.19543037116527556,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03767400272190571,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021521587623283267,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31716119647026064,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.386073637008667,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5541856288909912,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03171611912548542,
"step": 710
},
{
"calibration/aurc": 0.16409598680310306,
"calibration/batch_distribution_entropy": 0.9560436108340845,
"calibration/buffer_distribution_entropy": 0.9837300379537656,
"calibration/confidence_entropy": 0.4912980836544441,
"calibration/coverage@0%": 0.1578125,
"calibration/coverage@1%": 0.2390625,
"calibration/coverage@10%": 0.4213541666666667,
"calibration/coverage@15%": 0.4598958333333334,
"calibration/coverage@20%": 0.6744791666666667,
"calibration/coverage@25%": 0.7630208333333333,
"calibration/coverage@30%": 0.83125,
"calibration/coverage@5%": 0.3338541666666667,
"calibration/ece": 0.19649028546559952,
"calibration/mean_confidence": 0.5740259916177338,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00017361111111111605,
"completions/max_length": 3283.2,
"completions/max_terminated_length": 3283.2,
"completions/mean_length": 951.1273559570312,
"completions/mean_terminated_length": 951.2944213867188,
"completions/min_length": 156.4,
"completions/min_terminated_length": 273.4,
"epoch": 1.7175910301121236,
"grad_norm": 0.002813108032569289,
"learning_rate": 1.953125e-06,
"loss": 0.0067,
"num_tokens": 1907777379.0,
"reward": 1.0050580382347107,
"reward_std": 0.09790399223566056,
"rewards/accuracy_reward": 0.7006076455116272,
"rewards/brier_reward": 0.8105961203575134,
"rewards/confidence_uniqueness_reward": 0.9501657247543335,
"rewards/format_reward": 0.9998263835906982,
"rewards/frontier_coverage_0": 0.017787472996860742,
"rewards/frontier_coverage_1": 0.017787472996860742,
"rewards/frontier_coverage_10": 0.039942527562379836,
"rewards/frontier_coverage_15": 0.08797252029180527,
"rewards/frontier_coverage_20": 0.15163930654525756,
"rewards/frontier_coverage_25": 0.22993890941143036,
"rewards/frontier_coverage_5": 0.01784335859119892,
"rewards/frontier_entropy_batch_reward": -0.2928480267524719,
"signal/accuracy_reward/centered_abs_mean": 0.1241156667470932,
"signal/accuracy_reward/group_std_mean": 0.16047287881374359,
"signal/accuracy_reward/group_zero_std_frac": 0.5527777791023254,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0350728273391723,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0620578333735466,
"signal/advantage_abs_mean": 0.7821515083312989,
"signal/advantage_pre_scale_abs_mean": 0.07757937014102936,
"signal/advantage_pre_scale_std": 0.1194337010383606,
"signal/advantage_std": 0.9828120470046997,
"signal/brier_reward/centered_abs_mean": 0.10871105641126633,
"signal/brier_reward/group_std_mean": 0.13986618518829347,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18278415501117706,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010871105827391148,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01440898347645998,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018457892164587975,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024234963953495024,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014408984687179327,
"signal/format_reward/centered_abs_mean": 0.0003363715251907706,
"signal/format_reward/group_std_mean": 0.0009820926934480667,
"signal/format_reward/group_zero_std_frac": 0.9944444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002808227576315403,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0001681857625953853,
"signal/frontier_coverage_0/centered_abs_mean": 0.168141171336174,
"signal/frontier_coverage_0/group_std_mean": 0.21634862720966339,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04029642269015312,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002404418867081404,
"signal/frontier_coverage_1/centered_abs_mean": 0.168141171336174,
"signal/frontier_coverage_1/group_std_mean": 0.21634862720966339,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04029642269015312,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002404418867081404,
"signal/frontier_coverage_10/centered_abs_mean": 0.060566478222608564,
"signal/frontier_coverage_10/group_std_mean": 0.07745107561349869,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014549786597490311,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008661005995236337,
"signal/frontier_coverage_15/centered_abs_mean": 0.06986679509282112,
"signal/frontier_coverage_15/group_std_mean": 0.0866427794098854,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016798367351293565,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009990951512008905,
"signal/frontier_coverage_20/centered_abs_mean": 0.09684419780969619,
"signal/frontier_coverage_20/group_std_mean": 0.12026553452014924,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023264965415000914,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013848720118403435,
"signal/frontier_coverage_25/centered_abs_mean": 0.13374279588460922,
"signal/frontier_coverage_25/group_std_mean": 0.1665105402469635,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03211365006864071,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019125219201669096,
"signal/frontier_coverage_5/centered_abs_mean": 0.1679401069879532,
"signal/frontier_coverage_5/group_std_mean": 0.21610071659088134,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04024786874651909,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002401543501764536,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32422704696655275,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3925470232963562,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5443297028541565,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03242270387709141,
"step": 715
},
{
"calibration/aurc": 0.11311558304503566,
"calibration/batch_distribution_entropy": 0.9469167067730021,
"calibration/buffer_distribution_entropy": 0.9843809426976993,
"calibration/confidence_entropy": 0.4714342502997916,
"calibration/coverage@0%": 0.08125,
"calibration/coverage@1%": 0.15364583333333331,
"calibration/coverage@10%": 0.59375,
"calibration/coverage@15%": 0.6770833333333334,
"calibration/coverage@20%": 0.7578125,
"calibration/coverage@25%": 0.8171875,
"calibration/coverage@30%": 0.9010416666666667,
"calibration/coverage@5%": 0.5281250000000001,
"calibration/ece": 0.15241121949599107,
"calibration/mean_confidence": 0.6112474159206757,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0013020833333333482,
"completions/max_length": 3388.2,
"completions/max_terminated_length": 3388.2,
"completions/mean_length": 1030.08037109375,
"completions/mean_terminated_length": 1031.532958984375,
"completions/min_length": 150.0,
"completions/min_terminated_length": 324.8,
"epoch": 1.7295908801139985,
"grad_norm": 0.0027848011814057827,
"learning_rate": 1.9230769230769234e-06,
"loss": 0.0091,
"num_tokens": 1922750273.0,
"reward": 1.0154060482978822,
"reward_std": 0.10402875542640685,
"rewards/accuracy_reward": 0.715625,
"rewards/brier_reward": 0.8296850442886352,
"rewards/confidence_uniqueness_reward": 0.9488834977149964,
"rewards/format_reward": 0.9986979126930237,
"rewards/frontier_coverage_0": 0.026306459889747204,
"rewards/frontier_coverage_1": 0.026306459889747204,
"rewards/frontier_coverage_10": 0.04776106104254722,
"rewards/frontier_coverage_15": 0.10400652289390563,
"rewards/frontier_coverage_20": 0.1774923324584961,
"rewards/frontier_coverage_25": 0.2654254615306854,
"rewards/frontier_coverage_5": 0.02636495413025841,
"rewards/frontier_entropy_batch_reward": -0.29245676696300504,
"signal/accuracy_reward/centered_abs_mean": 0.12931857705116273,
"signal/accuracy_reward/group_std_mean": 0.17324694395065307,
"signal/accuracy_reward/group_zero_std_frac": 0.4944444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.016154146194458,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06465928852558137,
"signal/advantage_abs_mean": 0.768020224571228,
"signal/advantage_pre_scale_abs_mean": 0.08030048310756684,
"signal/advantage_pre_scale_std": 0.1255600705742836,
"signal/advantage_std": 0.9828881740570068,
"signal/brier_reward/centered_abs_mean": 0.1074549213051796,
"signal/brier_reward/group_std_mean": 0.13885989040136337,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1708405613899231,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01074549201875925,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015975476428866386,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0203463114798069,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02538231648504734,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015975477173924447,
"signal/format_reward/centered_abs_mean": 0.0019151475164107979,
"signal/format_reward/group_std_mean": 0.0030056854709982874,
"signal/format_reward/group_zero_std_frac": 0.9888888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01378869116306305,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0009575737582053989,
"signal/frontier_coverage_0/centered_abs_mean": 0.15936702787876128,
"signal/frontier_coverage_0/group_std_mean": 0.20675169229507445,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03624581061303615,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002278948575258255,
"signal/frontier_coverage_1/centered_abs_mean": 0.15936702787876128,
"signal/frontier_coverage_1/group_std_mean": 0.20675169229507445,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03624581061303615,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002278948575258255,
"signal/frontier_coverage_10/centered_abs_mean": 0.06151966378092766,
"signal/frontier_coverage_10/group_std_mean": 0.07832264006137848,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014088746346533298,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008797311922535301,
"signal/frontier_coverage_15/centered_abs_mean": 0.07503360658884048,
"signal/frontier_coverage_15/group_std_mean": 0.09373433589935302,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0172480970621109,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010729805566370488,
"signal/frontier_coverage_20/centered_abs_mean": 0.10515500009059905,
"signal/frontier_coverage_20/group_std_mean": 0.13239262700080873,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02413479909300804,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001503716385923326,
"signal/frontier_coverage_25/centered_abs_mean": 0.14459011554718018,
"signal/frontier_coverage_25/group_std_mean": 0.1828522264957428,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.033116637542843816,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020676386076956986,
"signal/frontier_coverage_5/centered_abs_mean": 0.15917536318302156,
"signal/frontier_coverage_5/group_std_mean": 0.20650528967380524,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03620238043367863,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002276207786053419,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33016577959060667,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3960925698280334,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5278231203556061,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0330165795981884,
"step": 720
},
{
"calibration/aurc": 0.08872587034216756,
"calibration/batch_distribution_entropy": 0.9598728659210309,
"calibration/buffer_distribution_entropy": 0.9835396752206689,
"calibration/confidence_entropy": 0.4633211907048679,
"calibration/coverage@0%": 0.09427083333333333,
"calibration/coverage@1%": 0.2114583333333333,
"calibration/coverage@10%": 0.6203125,
"calibration/coverage@15%": 0.8171875,
"calibration/coverage@20%": 0.9109375,
"calibration/coverage@25%": 0.9598958333333332,
"calibration/coverage@30%": 0.996875,
"calibration/coverage@5%": 0.415625,
"calibration/ece": 0.18747993802083335,
"calibration/mean_confidence": 0.5860404057291666,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009548611111111161,
"completions/max_length": 3613.4,
"completions/max_terminated_length": 3613.4,
"completions/mean_length": 1067.2394775390626,
"completions/mean_terminated_length": 1068.278662109375,
"completions/min_length": 37.0,
"completions/min_terminated_length": 250.6,
"epoch": 1.7415907301158735,
"grad_norm": 0.002794325351715088,
"learning_rate": 1.8930288461538463e-06,
"loss": 0.006,
"num_tokens": 1938145768.0,
"reward": 1.0198950052261353,
"reward_std": 0.1042319193482399,
"rewards/accuracy_reward": 0.7254340291023255,
"rewards/brier_reward": 0.8326017618179321,
"rewards/confidence_uniqueness_reward": 0.9485205292701722,
"rewards/format_reward": 0.9990451335906982,
"rewards/frontier_coverage_0": 0.022891762666404247,
"rewards/frontier_coverage_1": 0.022891762666404247,
"rewards/frontier_coverage_10": 0.048515988141298295,
"rewards/frontier_coverage_15": 0.10812882035970688,
"rewards/frontier_coverage_20": 0.18613292574882506,
"rewards/frontier_coverage_25": 0.2780951738357544,
"rewards/frontier_coverage_5": 0.022937561757862567,
"rewards/frontier_entropy_batch_reward": -0.30318026542663573,
"signal/accuracy_reward/centered_abs_mean": 0.12851019948720932,
"signal/accuracy_reward/group_std_mean": 0.16943923532962799,
"signal/accuracy_reward/group_zero_std_frac": 0.5138889014720917,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0418495416641236,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06425509974360466,
"signal/advantage_abs_mean": 0.7732895016670227,
"signal/advantage_pre_scale_abs_mean": 0.08091269582509994,
"signal/advantage_pre_scale_std": 0.1275203213095665,
"signal/advantage_std": 0.9828638076782227,
"signal/brier_reward/centered_abs_mean": 0.11142251789569854,
"signal/brier_reward/group_std_mean": 0.14267317950725555,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.180518040060997,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011142251826822758,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016142511367797853,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02136380970478058,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026282599568367003,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016142510809004306,
"signal/format_reward/centered_abs_mean": 0.0017523871501907706,
"signal/format_reward/group_std_mean": 0.00372017240151763,
"signal/format_reward/group_zero_std_frac": 0.9833333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013838812150061131,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008761935750953853,
"signal/frontier_coverage_0/centered_abs_mean": 0.1558055818080902,
"signal/frontier_coverage_0/group_std_mean": 0.2006031185388565,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03602770790457725,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022280198289081456,
"signal/frontier_coverage_1/centered_abs_mean": 0.1558055818080902,
"signal/frontier_coverage_1/group_std_mean": 0.2006031185388565,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03602770790457725,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022280198289081456,
"signal/frontier_coverage_10/centered_abs_mean": 0.06283498480916024,
"signal/frontier_coverage_10/group_std_mean": 0.07953204363584518,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014551288262009621,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008985402528196573,
"signal/frontier_coverage_15/centered_abs_mean": 0.07994391769170761,
"signal/frontier_coverage_15/group_std_mean": 0.09906959235668182,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01861151084303856,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011431980179622768,
"signal/frontier_coverage_20/centered_abs_mean": 0.11396068185567856,
"signal/frontier_coverage_20/group_std_mean": 0.14176457226276398,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02656862176954746,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016296377405524253,
"signal/frontier_coverage_25/centered_abs_mean": 0.1566249281167984,
"signal/frontier_coverage_25/group_std_mean": 0.19541522860527039,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03651894517242908,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022397364489734175,
"signal/frontier_coverage_5/centered_abs_mean": 0.1556170642375946,
"signal/frontier_coverage_5/group_std_mean": 0.20036795735359192,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0359842661768198,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002225324069149792,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3297608971595764,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3948035776615143,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5373982965946198,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03297608904540539,
"step": 725
},
{
"calibration/aurc": 0.09752518005747307,
"calibration/batch_distribution_entropy": 0.9710835255316346,
"calibration/buffer_distribution_entropy": 0.9840270711706335,
"calibration/confidence_entropy": 0.47364203921770354,
"calibration/coverage@0%": 0.11885063098346389,
"calibration/coverage@1%": 0.11937146431679721,
"calibration/coverage@10%": 0.6858409486510009,
"calibration/coverage@15%": 0.7546344647519583,
"calibration/coverage@20%": 0.8083156549173195,
"calibration/coverage@25%": 0.8802083333333334,
"calibration/coverage@30%": 0.9385416666666666,
"calibration/coverage@5%": 0.5764101936466492,
"calibration/ece": 0.19226834575582027,
"calibration/mean_confidence": 0.5581783267311249,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0008680555555555802,
"completions/max_length": 3871.0,
"completions/max_terminated_length": 3871.0,
"completions/mean_length": 1115.5952270507812,
"completions/mean_terminated_length": 1116.6011596679687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 261.4,
"epoch": 1.7535905801177485,
"grad_norm": 0.0026010002475231886,
"learning_rate": 1.8629807692307695e-06,
"loss": 0.0023,
"num_tokens": 1954102993.0,
"reward": 1.011343765258789,
"reward_std": 0.09056015610694886,
"rewards/accuracy_reward": 0.6978298544883728,
"rewards/brier_reward": 0.8352743029594422,
"rewards/confidence_uniqueness_reward": 0.9510626435279846,
"rewards/format_reward": 0.9991319417953491,
"rewards/frontier_coverage_0": 0.04333948716521263,
"rewards/frontier_coverage_1": 0.04333948716521263,
"rewards/frontier_coverage_10": 0.05298603735864162,
"rewards/frontier_coverage_15": 0.10530868023633957,
"rewards/frontier_coverage_20": 0.17865284085273742,
"rewards/frontier_coverage_25": 0.26539782881736756,
"rewards/frontier_coverage_5": 0.0433408307551872,
"rewards/frontier_entropy_batch_reward": -0.2624367654323578,
"signal/accuracy_reward/centered_abs_mean": 0.10245768278837204,
"signal/accuracy_reward/group_std_mean": 0.14452196955680846,
"signal/accuracy_reward/group_zero_std_frac": 0.5527777671813965,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8927505373954773,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05122884139418602,
"signal/advantage_abs_mean": 0.7562663912773132,
"signal/advantage_pre_scale_abs_mean": 0.0673715204000473,
"signal/advantage_pre_scale_std": 0.11014018654823303,
"signal/advantage_std": 0.9827435135841369,
"signal/brier_reward/centered_abs_mean": 0.09993450939655305,
"signal/brier_reward/group_std_mean": 0.13122970312833787,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17443340718746186,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009993451088666916,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0147280341014266,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0207037802785635,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025786501169204713,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014728034380823373,
"signal/format_reward/centered_abs_mean": 0.0016710069146938622,
"signal/format_reward/group_std_mean": 0.004611522704362869,
"signal/format_reward/group_zero_std_frac": 0.9749999880790711,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014536320511251689,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008355034573469311,
"signal/frontier_coverage_0/centered_abs_mean": 0.15731069147586824,
"signal/frontier_coverage_0/group_std_mean": 0.20592527985572814,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039265432953834535,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002249542810022831,
"signal/frontier_coverage_1/centered_abs_mean": 0.15731069147586824,
"signal/frontier_coverage_1/group_std_mean": 0.20592527985572814,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039265432953834535,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002249542810022831,
"signal/frontier_coverage_10/centered_abs_mean": 0.06539921313524247,
"signal/frontier_coverage_10/group_std_mean": 0.08309726417064667,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016339878924190997,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009352087508887053,
"signal/frontier_coverage_15/centered_abs_mean": 0.06776027828454971,
"signal/frontier_coverage_15/group_std_mean": 0.08447953909635544,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016957908309996127,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009689719881862402,
"signal/frontier_coverage_20/centered_abs_mean": 0.09046011716127396,
"signal/frontier_coverage_20/group_std_mean": 0.11392348855733872,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022640842571854593,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012935796519741416,
"signal/frontier_coverage_25/centered_abs_mean": 0.12250153869390487,
"signal/frontier_coverage_25/group_std_mean": 0.15532831847667694,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030646225064992906,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017517718952149153,
"signal/frontier_coverage_5/centered_abs_mean": 0.15711890459060668,
"signal/frontier_coverage_5/group_std_mean": 0.20567532181739806,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03921758532524109,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002246800297871232,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3165683627128601,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3849177360534668,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5541592180728913,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03165683671832085,
"step": 730
},
{
"calibration/aurc": 0.17294304314154013,
"calibration/batch_distribution_entropy": 0.9683228508572282,
"calibration/buffer_distribution_entropy": 0.9847746833464687,
"calibration/confidence_entropy": 0.4747779032447707,
"calibration/coverage@0%": 0.13557033289817233,
"calibration/coverage@1%": 0.2148729873803307,
"calibration/coverage@10%": 0.35208197345517844,
"calibration/coverage@15%": 0.4916666666666667,
"calibration/coverage@20%": 0.6567708333333334,
"calibration/coverage@25%": 0.765625,
"calibration/coverage@30%": 0.8411458333333333,
"calibration/coverage@5%": 0.3020098999129678,
"calibration/ece": 0.19413389205287204,
"calibration/mean_confidence": 0.5501757289490862,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003038194444444464,
"completions/max_length": 3543.0,
"completions/max_terminated_length": 3543.0,
"completions/mean_length": 1157.7715087890624,
"completions/mean_terminated_length": 1161.328662109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 279.0,
"epoch": 1.7655904301196235,
"grad_norm": 0.0026367914397269487,
"learning_rate": 1.8329326923076924e-06,
"loss": 0.0072,
"num_tokens": 1970541641.0,
"reward": 1.005565345287323,
"reward_std": 0.10125984996557236,
"rewards/accuracy_reward": 0.6956597208976746,
"rewards/brier_reward": 0.8180548667907714,
"rewards/confidence_uniqueness_reward": 0.9483519077301026,
"rewards/format_reward": 0.996961796283722,
"rewards/frontier_coverage_0": 0.029731686878949403,
"rewards/frontier_coverage_1": 0.029731686878949403,
"rewards/frontier_coverage_10": 0.046731724962592126,
"rewards/frontier_coverage_15": 0.0951116681098938,
"rewards/frontier_coverage_20": 0.1619558095932007,
"rewards/frontier_coverage_25": 0.24104999899864196,
"rewards/frontier_coverage_5": 0.029736339347437024,
"rewards/frontier_entropy_batch_reward": -0.26453024744987486,
"signal/accuracy_reward/centered_abs_mean": 0.12727864384651183,
"signal/accuracy_reward/group_std_mean": 0.16613382697105408,
"signal/accuracy_reward/group_zero_std_frac": 0.5333333313465118,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0535690546035767,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06363932192325591,
"signal/advantage_abs_mean": 0.7730516195297241,
"signal/advantage_pre_scale_abs_mean": 0.07883715778589248,
"signal/advantage_pre_scale_std": 0.12539079785346985,
"signal/advantage_std": 0.9828304886817932,
"signal/brier_reward/centered_abs_mean": 0.10875847935676575,
"signal/brier_reward/group_std_mean": 0.14072711169719695,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18023832142353058,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010875848308205604,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01687628235667944,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023477645963430403,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028091933578252792,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00168762831017375,
"signal/format_reward/centered_abs_mean": 0.0035319010145030915,
"signal/format_reward/group_std_mean": 0.006885326839983464,
"signal/format_reward/group_zero_std_frac": 0.9694444417953492,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.029567970614880323,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0017659505072515457,
"signal/frontier_coverage_0/centered_abs_mean": 0.17091023325920104,
"signal/frontier_coverage_0/group_std_mean": 0.22292305529117584,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04045567587018013,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024440162349492313,
"signal/frontier_coverage_1/centered_abs_mean": 0.17091023325920104,
"signal/frontier_coverage_1/group_std_mean": 0.22292305529117584,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04045567587018013,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024440162349492313,
"signal/frontier_coverage_10/centered_abs_mean": 0.06697189211845397,
"signal/frontier_coverage_10/group_std_mean": 0.08556520938873291,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015862343646585942,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009576980140991509,
"signal/frontier_coverage_15/centered_abs_mean": 0.07027349472045899,
"signal/frontier_coverage_15/group_std_mean": 0.08739349991083145,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016706252470612526,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010049110394902527,
"signal/frontier_coverage_20/centered_abs_mean": 0.09674167782068252,
"signal/frontier_coverage_20/group_std_mean": 0.1208344653248787,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02301716059446335,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013834059704095126,
"signal/frontier_coverage_25/centered_abs_mean": 0.13244094848632812,
"signal/frontier_coverage_25/group_std_mean": 0.16553622782230376,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03150580003857613,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001893905596807599,
"signal/frontier_coverage_5/centered_abs_mean": 0.1706935554742813,
"signal/frontier_coverage_5/group_std_mean": 0.22264962494373322,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.040404599905014035,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024409178644418717,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30670446157455444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3765598952770233,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5103662192821503,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030670446157455445,
"step": 735
},
{
"calibration/aurc": 0.09985071608809112,
"calibration/batch_distribution_entropy": 0.9283240535799662,
"calibration/buffer_distribution_entropy": 0.9848685619549269,
"calibration/confidence_entropy": 0.486386545829507,
"calibration/coverage@0%": 0.043229166666666666,
"calibration/coverage@1%": 0.09687499999999999,
"calibration/coverage@10%": 0.6302083333333334,
"calibration/coverage@15%": 0.7114583333333333,
"calibration/coverage@20%": 0.9317708333333334,
"calibration/coverage@25%": 0.9541666666666668,
"calibration/coverage@30%": 0.9651041666666668,
"calibration/coverage@5%": 0.5005208333333334,
"calibration/ece": 0.18411820729166667,
"calibration/mean_confidence": 0.611181165625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00234375,
"completions/max_length": 3516.0,
"completions/max_terminated_length": 3516.0,
"completions/mean_length": 1135.1943115234376,
"completions/mean_terminated_length": 1137.8385498046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 298.8,
"epoch": 1.7775902801214984,
"grad_norm": 0.0029641035944223404,
"learning_rate": 1.8028846153846156e-06,
"loss": 0.0037,
"num_tokens": 1986678471.0,
"reward": 1.0208436727523804,
"reward_std": 0.09842554479837418,
"rewards/accuracy_reward": 0.7300347208976745,
"rewards/brier_reward": 0.8294078826904296,
"rewards/confidence_uniqueness_reward": 0.9471985816955566,
"rewards/format_reward": 0.99765625,
"rewards/frontier_coverage_0": 0.015275874444341753,
"rewards/frontier_coverage_1": 0.015275874444341753,
"rewards/frontier_coverage_10": 0.048988838493824,
"rewards/frontier_coverage_15": 0.10942500680685044,
"rewards/frontier_coverage_20": 0.1874927282333374,
"rewards/frontier_coverage_25": 0.27740028500556946,
"rewards/frontier_coverage_5": 0.015306396328378468,
"rewards/frontier_entropy_batch_reward": -0.3023156225681305,
"signal/accuracy_reward/centered_abs_mean": 0.11213107407093048,
"signal/accuracy_reward/group_std_mean": 0.14724079966545106,
"signal/accuracy_reward/group_zero_std_frac": 0.5861111044883728,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9607778906822204,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05606553703546524,
"signal/advantage_abs_mean": 0.7797276496887207,
"signal/advantage_pre_scale_abs_mean": 0.07560006380081177,
"signal/advantage_pre_scale_std": 0.12288236767053604,
"signal/advantage_std": 0.9827636361122132,
"signal/brier_reward/centered_abs_mean": 0.10327324271202087,
"signal/brier_reward/group_std_mean": 0.13345020413398742,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17794868648052214,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010327324084937573,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018071673437952997,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026784731075167656,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03143479339778423,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018071672879159451,
"signal/format_reward/centered_abs_mean": 0.004345703113358468,
"signal/format_reward/group_std_mean": 0.009928835928440094,
"signal/format_reward/group_zero_std_frac": 0.9527777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03843059604987502,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002172851556679234,
"signal/frontier_coverage_0/centered_abs_mean": 0.14959966242313386,
"signal/frontier_coverage_0/group_std_mean": 0.1921100914478302,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03681350834667683,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021392751950770617,
"signal/frontier_coverage_1/centered_abs_mean": 0.14959966242313386,
"signal/frontier_coverage_1/group_std_mean": 0.1921100914478302,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03681350834667683,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021392751950770617,
"signal/frontier_coverage_10/centered_abs_mean": 0.06143470034003258,
"signal/frontier_coverage_10/group_std_mean": 0.07756249755620956,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015148719027638435,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008785162470303475,
"signal/frontier_coverage_15/centered_abs_mean": 0.07495491802692414,
"signal/frontier_coverage_15/group_std_mean": 0.09279847294092178,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018493932485580445,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010718553327023982,
"signal/frontier_coverage_20/centered_abs_mean": 0.1055668607354164,
"signal/frontier_coverage_20/group_std_mean": 0.13145640641450881,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0260279543697834,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015096060931682586,
"signal/frontier_coverage_25/centered_abs_mean": 0.14254448264837266,
"signal/frontier_coverage_25/group_std_mean": 0.17810506224632264,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0351248387247324,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020383860683068633,
"signal/frontier_coverage_5/centered_abs_mean": 0.1494191914796829,
"signal/frontier_coverage_5/group_std_mean": 0.19188562035560608,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03676880933344364,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002136694313958287,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33148173689842225,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40152330994606017,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5731380939483642,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033148175477981566,
"step": 740
},
{
"calibration/aurc": 0.13612695574220837,
"calibration/batch_distribution_entropy": 0.9295601144978141,
"calibration/buffer_distribution_entropy": 0.9845439520338692,
"calibration/confidence_entropy": 0.4805375002432246,
"calibration/coverage@0%": 0.05367711053089643,
"calibration/coverage@1%": 0.10628127719756311,
"calibration/coverage@10%": 0.3990059290687554,
"calibration/coverage@15%": 0.6105648933855526,
"calibration/coverage@20%": 0.8020207789382072,
"calibration/coverage@25%": 0.8947916666666667,
"calibration/coverage@30%": 0.9427083333333333,
"calibration/coverage@5%": 0.27295066362053955,
"calibration/ece": 0.1459114410356832,
"calibration/mean_confidence": 0.5916419980417755,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001128472222222232,
"completions/max_length": 3589.0,
"completions/max_terminated_length": 3589.0,
"completions/mean_length": 1115.8822509765625,
"completions/mean_terminated_length": 1117.1528076171876,
"completions/min_length": 0.0,
"completions/min_terminated_length": 297.4,
"epoch": 1.7895901301233734,
"grad_norm": 0.002868454437702894,
"learning_rate": 1.7728365384615387e-06,
"loss": 0.0094,
"num_tokens": 2002651930.0,
"reward": 1.0109495759010314,
"reward_std": 0.10073102861642838,
"rewards/accuracy_reward": 0.706249988079071,
"rewards/brier_reward": 0.8292598009109498,
"rewards/confidence_uniqueness_reward": 0.9492504000663757,
"rewards/format_reward": 0.9988715171813964,
"rewards/frontier_coverage_0": 0.02917664125561714,
"rewards/frontier_coverage_1": 0.02917664125561714,
"rewards/frontier_coverage_10": 0.04859147928655148,
"rewards/frontier_coverage_15": 0.1054500088095665,
"rewards/frontier_coverage_20": 0.17938823401927947,
"rewards/frontier_coverage_25": 0.26465229988098143,
"rewards/frontier_coverage_5": 0.02918265573680401,
"rewards/frontier_entropy_batch_reward": -0.2926656484603882,
"signal/accuracy_reward/centered_abs_mean": 0.12176649272441864,
"signal/accuracy_reward/group_std_mean": 0.16606390178203584,
"signal/accuracy_reward/group_zero_std_frac": 0.5055555701255798,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9774610042572022,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06088324636220932,
"signal/advantage_abs_mean": 0.7588282704353333,
"signal/advantage_pre_scale_abs_mean": 0.07568231076002122,
"signal/advantage_pre_scale_std": 0.12143019586801529,
"signal/advantage_std": 0.9828806400299073,
"signal/brier_reward/centered_abs_mean": 0.10687040835618973,
"signal/brier_reward/group_std_mean": 0.1374327689409256,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17205712497234343,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01068704053759575,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015384367294609546,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02148051857948303,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0247656911611557,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015384367434307933,
"signal/format_reward/centered_abs_mean": 0.0021104600746184587,
"signal/format_reward/group_std_mean": 0.005053839646279812,
"signal/format_reward/group_zero_std_frac": 0.9749999880790711,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016888655349612237,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0010552300373092294,
"signal/frontier_coverage_0/centered_abs_mean": 0.1621706336736679,
"signal/frontier_coverage_0/group_std_mean": 0.2084288328886032,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03729799836874008,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023190399631857874,
"signal/frontier_coverage_1/centered_abs_mean": 0.1621706336736679,
"signal/frontier_coverage_1/group_std_mean": 0.2084288328886032,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03729799836874008,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023190399631857874,
"signal/frontier_coverage_10/centered_abs_mean": 0.06481548249721528,
"signal/frontier_coverage_10/group_std_mean": 0.08101860135793686,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014912334084510804,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009268613997846842,
"signal/frontier_coverage_15/centered_abs_mean": 0.07552316784858704,
"signal/frontier_coverage_15/group_std_mean": 0.09370106011629105,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017398131638765336,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010799813084304334,
"signal/frontier_coverage_20/centered_abs_mean": 0.10281162559986115,
"signal/frontier_coverage_20/group_std_mean": 0.1287536635994911,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023685456439852714,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014702062588185072,
"signal/frontier_coverage_25/centered_abs_mean": 0.1379389226436615,
"signal/frontier_coverage_25/group_std_mean": 0.17409807741641997,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03176463283598423,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019725265679880976,
"signal/frontier_coverage_5/centered_abs_mean": 0.16200172007083893,
"signal/frontier_coverage_5/group_std_mean": 0.20821319222450257,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03725910410284996,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023166246246546507,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32640965580940245,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39191449284553526,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5255501866340637,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03264096602797508,
"step": 745
},
{
"calibration/aurc": 0.10590190080432076,
"calibration/batch_distribution_entropy": 0.9592523462908378,
"calibration/buffer_distribution_entropy": 0.9840037174285836,
"calibration/confidence_entropy": 0.5020861036359838,
"calibration/coverage@0%": 0.17800669060052218,
"calibration/coverage@1%": 0.25043108137510883,
"calibration/coverage@10%": 0.607252230200174,
"calibration/coverage@15%": 0.71875,
"calibration/coverage@20%": 0.8104166666666666,
"calibration/coverage@25%": 0.8848958333333332,
"calibration/coverage@30%": 0.9536458333333334,
"calibration/coverage@5%": 0.34107783942558745,
"calibration/ece": 0.1767842219253155,
"calibration/mean_confidence": 0.5643607527809509,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001475694444444442,
"completions/max_length": 3785.4,
"completions/max_terminated_length": 3785.4,
"completions/mean_length": 1129.112158203125,
"completions/mean_terminated_length": 1130.78193359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 321.2,
"epoch": 1.8015899801252484,
"grad_norm": 0.0028191518504172564,
"learning_rate": 1.7427884615384616e-06,
"loss": 0.0067,
"num_tokens": 2018771430.0,
"reward": 1.01758474111557,
"reward_std": 0.10301121026277542,
"rewards/accuracy_reward": 0.7212673544883728,
"rewards/brier_reward": 0.8407497763633728,
"rewards/confidence_uniqueness_reward": 0.9472701191902161,
"rewards/format_reward": 0.998524296283722,
"rewards/frontier_coverage_0": 0.03290572431869805,
"rewards/frontier_coverage_1": 0.03290572431869805,
"rewards/frontier_coverage_10": 0.053498401492834094,
"rewards/frontier_coverage_15": 0.1104421705007553,
"rewards/frontier_coverage_20": 0.1861650675535202,
"rewards/frontier_coverage_25": 0.27491688430309297,
"rewards/frontier_coverage_5": 0.032948700070846826,
"rewards/frontier_entropy_batch_reward": -0.31463190019130705,
"signal/accuracy_reward/centered_abs_mean": 0.118994140625,
"signal/accuracy_reward/group_std_mean": 0.16333966851234435,
"signal/accuracy_reward/group_zero_std_frac": 0.5166666746139527,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9732692003250122,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0594970703125,
"signal/advantage_abs_mean": 0.7545445919036865,
"signal/advantage_pre_scale_abs_mean": 0.07700014412403107,
"signal/advantage_pre_scale_std": 0.1261327385902405,
"signal/advantage_std": 0.9828519821166992,
"signal/brier_reward/centered_abs_mean": 0.10183228701353073,
"signal/brier_reward/group_std_mean": 0.13478365838527678,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16659377813339232,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010183229111135006,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016775419190526008,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022979332879185677,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02745484858751297,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016775419702753426,
"signal/format_reward/centered_abs_mean": 0.00267469622194767,
"signal/format_reward/group_std_mean": 0.005587521148845554,
"signal/format_reward/group_zero_std_frac": 0.9749999880790711,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02148791467770934,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001337348110973835,
"signal/frontier_coverage_0/centered_abs_mean": 0.13947168439626695,
"signal/frontier_coverage_0/group_std_mean": 0.18601751327514648,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032587919384241104,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019944450818002225,
"signal/frontier_coverage_1/centered_abs_mean": 0.13947168439626695,
"signal/frontier_coverage_1/group_std_mean": 0.18601751327514648,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032587919384241104,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019944450818002225,
"signal/frontier_coverage_10/centered_abs_mean": 0.05676937475800514,
"signal/frontier_coverage_10/group_std_mean": 0.07297334596514701,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013288442231714725,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008118020137771964,
"signal/frontier_coverage_15/centered_abs_mean": 0.07833496183156967,
"signal/frontier_coverage_15/group_std_mean": 0.09797212928533554,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01837916225194931,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001120189926587045,
"signal/frontier_coverage_20/centered_abs_mean": 0.11107763350009918,
"signal/frontier_coverage_20/group_std_mean": 0.13975699096918107,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02607056647539139,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001588410115800798,
"signal/frontier_coverage_25/centered_abs_mean": 0.1506495952606201,
"signal/frontier_coverage_25/group_std_mean": 0.19028232991695404,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0353517659008503,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002154289116151631,
"signal/frontier_coverage_5/centered_abs_mean": 0.13931359052658082,
"signal/frontier_coverage_5/group_std_mean": 0.18581429719924927,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0325510174036026,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00199218422640115,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32679831981658936,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3907285392284393,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5357029259204864,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032679833471775055,
"step": 750
},
{
"epoch": 1.8015899801252484,
"eval_calibration/aurc": 0.14554261102872565,
"eval_calibration/batch_distribution_entropy": 0.91718440536761,
"eval_calibration/buffer_distribution_entropy": 0.9837377091437123,
"eval_calibration/confidence_entropy": 0.48853757066041226,
"eval_calibration/coverage@0%": 0.18229166666666666,
"eval_calibration/coverage@1%": 0.18229166666666666,
"eval_calibration/coverage@10%": 0.5208333333333334,
"eval_calibration/coverage@15%": 0.671875,
"eval_calibration/coverage@20%": 0.7708333333333334,
"eval_calibration/coverage@25%": 0.8802083333333334,
"eval_calibration/coverage@30%": 0.921875,
"eval_calibration/coverage@5%": 0.2552083333333333,
"eval_calibration/ece": 0.21193911458333328,
"eval_calibration/mean_confidence": 0.5900025520833334,
"eval_completions/clipped_ratio": 0.0008680555555555617,
"eval_completions/max_length": 2721.3333333333335,
"eval_completions/max_terminated_length": 2721.3333333333335,
"eval_completions/mean_length": 1122.1944580078125,
"eval_completions/mean_terminated_length": 1123.159200032552,
"eval_completions/min_length": 336.8333333333333,
"eval_completions/min_terminated_length": 404.5,
"eval_loss": 0.0,
"eval_num_tokens": 2018771430.0,
"eval_reward": 0.9297488828500112,
"eval_reward_std": 0.23093928893407187,
"eval_rewards/accuracy_reward": 0.6935763955116272,
"eval_rewards/brier_reward": 0.8346609771251678,
"eval_rewards/confidence_uniqueness_reward": 0.8985180159409841,
"eval_rewards/format_reward": 0.9991319477558136,
"eval_rewards/frontier_coverage_0": 0.03844601707533002,
"eval_rewards/frontier_coverage_1": 0.03844601707533002,
"eval_rewards/frontier_coverage_10": 0.04998234659433365,
"eval_rewards/frontier_coverage_15": 0.10391578078269958,
"eval_rewards/frontier_coverage_20": 0.17418100436528525,
"eval_rewards/frontier_coverage_25": 0.25515559564034146,
"eval_rewards/frontier_coverage_5": 0.03847376614188155,
"eval_rewards/frontier_entropy_batch_reward": -0.9991319477558136,
"eval_runtime": 158.3973,
"eval_samples_per_second": 6.313,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4097764740387599,
"eval_signal/accuracy_reward/group_std_mean": 0.45795584718386334,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8910409013430277,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20488823701937994,
"eval_signal/advantage_abs_mean": 0.87361212571462,
"eval_signal/advantage_pre_scale_abs_mean": 0.202990693350633,
"eval_signal/advantage_pre_scale_std": 0.2284200762708982,
"eval_signal/advantage_std": 0.9863944252332052,
"eval_signal/brier_reward/centered_abs_mean": 0.1545459379752477,
"eval_signal/brier_reward/group_std_mean": 0.2079830765724182,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06722560152411461,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.015454593890657028,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04118582233786583,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05077329402168592,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017912627197802067,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004118582389007012,
"eval_signal/format_reward/centered_abs_mean": 0.0016818575871487458,
"eval_signal/format_reward/group_std_mean": 0.0049104637776811915,
"eval_signal/format_reward/group_zero_std_frac": 0.9722222288449606,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0035001467913389206,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0008409287935743729,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2608467886845271,
"eval_signal/frontier_coverage_0/group_std_mean": 0.36050594846407574,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016234679458041985,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037301090002680817,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2608467886845271,
"eval_signal/frontier_coverage_1/group_std_mean": 0.36050594846407574,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016234679458041985,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037301090002680817,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.07517050827542941,
"eval_signal/frontier_coverage_10/group_std_mean": 0.10018332054217656,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004679729075481494,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010749382199719548,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.11889106159408887,
"eval_signal/frontier_coverage_15/group_std_mean": 0.1488691916068395,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007406616040195028,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001700142165645957,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.20220743864774704,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2463468238711357,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01259286655113101,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002891566293934981,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.29517705241839093,
"eval_signal/frontier_coverage_25/group_std_mean": 0.35718540847301483,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018373853837450344,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004221031907945871,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2604764675100644,
"eval_signal/frontier_coverage_5/group_std_mean": 0.3600422491629918,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016211653128266335,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037248135389139256,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0016818575871487458,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0049104637776811915,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9722222288449606,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007000294669220845,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0001681857587148746,
"eval_steps_per_second": 0.038,
"step": 750
},
{
"epoch": 1.8015899801252484,
"step": 750,
"train_probe_calibration/aurc": 0.10360412267687767,
"train_probe_calibration/batch_distribution_entropy": 0.9012179728421429,
"train_probe_calibration/buffer_distribution_entropy": 0.9835511144626669,
"train_probe_calibration/confidence_entropy": 0.4963477415046183,
"train_probe_calibration/coverage@0%": 0.3125,
"train_probe_calibration/coverage@1%": 0.3125,
"train_probe_calibration/coverage@10%": 0.640625,
"train_probe_calibration/coverage@15%": 0.734375,
"train_probe_calibration/coverage@20%": 0.8541666666666666,
"train_probe_calibration/coverage@25%": 0.9635416666666666,
"train_probe_calibration/coverage@30%": 0.9895833333333334,
"train_probe_calibration/coverage@5%": 0.375,
"train_probe_calibration/ece": 0.2142140625,
"train_probe_calibration/mean_confidence": 0.5926161458333333,
"train_probe_completions/clipped_ratio": 0.0008680555555555617,
"train_probe_completions/max_length": 3456.1666666666665,
"train_probe_completions/max_terminated_length": 3456.1666666666665,
"train_probe_completions/mean_length": 1130.0889282226562,
"train_probe_completions/mean_terminated_length": 1131.0578002929688,
"train_probe_completions/min_length": 281.3333333333333,
"train_probe_completions/min_terminated_length": 343.1666666666667,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 2018771430.0,
"train_probe_reward": 0.95806951324145,
"train_probe_reward_std": 0.21482898046573004,
"train_probe_rewards/accuracy_reward": 0.7499999900658926,
"train_probe_rewards/brier_reward": 0.8428874909877777,
"train_probe_rewards/confidence_uniqueness_reward": 0.8931871751944224,
"train_probe_rewards/format_reward": 0.9991319477558136,
"train_probe_rewards/frontier_coverage_0": 0.012033387494739145,
"train_probe_rewards/frontier_coverage_1": 0.012033387494739145,
"train_probe_rewards/frontier_coverage_10": 0.0505746491253376,
"train_probe_rewards/frontier_coverage_15": 0.11439343293507893,
"train_probe_rewards/frontier_coverage_20": 0.19532609979311624,
"train_probe_rewards/frontier_coverage_25": 0.28949019064505893,
"train_probe_rewards/frontier_coverage_5": 0.012110456203420958,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9991319477558136,
"train_probe_runtime": 188.5693,
"train_probe_samples_per_second": 5.303,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3663194427887599,
"train_probe_signal/accuracy_reward/group_std_mean": 0.43311170240243274,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8608072102069855,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18315972139437994,
"train_probe_signal/advantage_abs_mean": 0.8269814153512319,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.17903072386980057,
"train_probe_signal/advantage_pre_scale_std": 0.2130366489291191,
"train_probe_signal/advantage_std": 0.9863598346710205,
"train_probe_signal/brier_reward/centered_abs_mean": 0.14471079657475153,
"train_probe_signal/brier_reward/group_std_mean": 0.19478769848744074,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06802343266705672,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.014471080464621386,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04179748644431432,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05130287570257982,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019671458440522354,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004179748706519604,
"train_probe_signal/format_reward/centered_abs_mean": 0.0016818575871487458,
"train_probe_signal/format_reward/group_std_mean": 0.0049104637776811915,
"train_probe_signal/format_reward/group_zero_std_frac": 0.9722222288449606,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0038858645906051,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0008409287935743729,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2487302447358767,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.3555862208207448,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01672346827884515,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035568424112473926,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2487302447358767,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.3555862208207448,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01672346827884515,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035568424112473926,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07384044552842776,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.09972128023703893,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004964815763135751,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001055918352600808,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11256096636255582,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.13923830290635428,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007571271853521466,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016096217441372573,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.18888175984223685,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.22895304610331854,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012701889500021935,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002701009080434839,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2721845557292302,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.32942800720532733,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0183031614869833,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038922393772130213,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.24837070206801096,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3551288843154907,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016699314738313358,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035517010061691203,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0016818575871487458,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0049104637776811915,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9722222288449606,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007771729336430629,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0001681857587148746,
"train_probe_steps_per_second": 0.032
},
{
"calibration/aurc": 0.16302321103458023,
"calibration/batch_distribution_entropy": 0.9463972655806352,
"calibration/buffer_distribution_entropy": 0.9836929635418779,
"calibration/confidence_entropy": 0.5061289924018656,
"calibration/coverage@0%": 0.055265748031496066,
"calibration/coverage@1%": 0.055265748031496066,
"calibration/coverage@10%": 0.33877132545931754,
"calibration/coverage@15%": 0.5459153543307086,
"calibration/coverage@20%": 0.7237942913385826,
"calibration/coverage@25%": 0.8281044947506562,
"calibration/coverage@30%": 0.9072916666666666,
"calibration/coverage@5%": 0.14729740813648293,
"calibration/ece": 0.1197935157480315,
"calibration/mean_confidence": 0.5964368398950131,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000694444444444442,
"completions/max_length": 3647.2,
"completions/max_terminated_length": 3647.2,
"completions/mean_length": 1137.7447021484375,
"completions/mean_terminated_length": 1138.55546875,
"completions/min_length": 140.4,
"completions/min_terminated_length": 322.2,
"epoch": 1.8135898301271234,
"grad_norm": 0.0029852569568902254,
"learning_rate": 1.7127403846153848e-06,
"loss": 0.0088,
"num_tokens": 2034969545.0,
"reward": 1.0164843440055846,
"reward_std": 0.09799668043851853,
"rewards/accuracy_reward": 0.7115451455116272,
"rewards/brier_reward": 0.8474408864974976,
"rewards/confidence_uniqueness_reward": 0.9490378737449646,
"rewards/format_reward": 0.9993055462837219,
"rewards/frontier_coverage_0": 0.0392310387454927,
"rewards/frontier_coverage_1": 0.0392310387454927,
"rewards/frontier_coverage_10": 0.05753873959183693,
"rewards/frontier_coverage_15": 0.11426883339881896,
"rewards/frontier_coverage_20": 0.1901752233505249,
"rewards/frontier_coverage_25": 0.2787013977766037,
"rewards/frontier_coverage_5": 0.03928067879751325,
"rewards/frontier_entropy_batch_reward": -0.2943439185619354,
"signal/accuracy_reward/centered_abs_mean": 0.11543511301279068,
"signal/accuracy_reward/group_std_mean": 0.15154503285884857,
"signal/accuracy_reward/group_zero_std_frac": 0.5750000178813934,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9787806272506714,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05771755650639534,
"signal/advantage_abs_mean": 0.7752339601516723,
"signal/advantage_pre_scale_abs_mean": 0.0760658174753189,
"signal/advantage_pre_scale_std": 0.12216138690710068,
"signal/advantage_std": 0.9827526807785034,
"signal/brier_reward/centered_abs_mean": 0.0946065753698349,
"signal/brier_reward/group_std_mean": 0.12359896749258041,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1623237133026123,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009460657835006714,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01507211085408926,
"signal/confidence_uniqueness_reward/group_std_mean": 0.020275114849209786,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026477007195353508,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00150721101090312,
"signal/format_reward/centered_abs_mean": 0.0013129340135492385,
"signal/format_reward/group_std_mean": 0.0032778555527329446,
"signal/format_reward/group_zero_std_frac": 0.9833333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012105725053697825,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006564670067746193,
"signal/frontier_coverage_0/centered_abs_mean": 0.14191508293151855,
"signal/frontier_coverage_0/group_std_mean": 0.18624544739723206,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03474088981747627,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020293857669457793,
"signal/frontier_coverage_1/centered_abs_mean": 0.14191508293151855,
"signal/frontier_coverage_1/group_std_mean": 0.18624544739723206,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03474088981747627,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020293857669457793,
"signal/frontier_coverage_10/centered_abs_mean": 0.05707173347473145,
"signal/frontier_coverage_10/group_std_mean": 0.07210961431264877,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01410923469811678,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000816125818528235,
"signal/frontier_coverage_15/centered_abs_mean": 0.07441399842500687,
"signal/frontier_coverage_15/group_std_mean": 0.09240110963582993,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018513403832912445,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010641201632097363,
"signal/frontier_coverage_20/centered_abs_mean": 0.10533722341060639,
"signal/frontier_coverage_20/group_std_mean": 0.13098296225070954,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026201526075601576,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001506322273053229,
"signal/frontier_coverage_25/centered_abs_mean": 0.143466717004776,
"signal/frontier_coverage_25/group_std_mean": 0.17837926149368286,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03564382195472717,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00205157408490777,
"signal/frontier_coverage_5/centered_abs_mean": 0.1416635975241661,
"signal/frontier_coverage_5/group_std_mean": 0.1859228640794754,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03467725887894631,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020257893018424513,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32274038195610044,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3907089829444885,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.561868679523468,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032274038344621656,
"step": 755
},
{
"calibration/aurc": 0.1406128362914006,
"calibration/batch_distribution_entropy": 0.9271639045183393,
"calibration/buffer_distribution_entropy": 0.9835649008372214,
"calibration/confidence_entropy": 0.49665028010211987,
"calibration/coverage@0%": 0.08821393603133158,
"calibration/coverage@1%": 0.14774396214099217,
"calibration/coverage@10%": 0.390082408616188,
"calibration/coverage@15%": 0.5203532963446474,
"calibration/coverage@20%": 0.782351501305483,
"calibration/coverage@25%": 0.906036499129678,
"calibration/coverage@30%": 0.9712793733681462,
"calibration/coverage@5%": 0.22753073324630116,
"calibration/ece": 0.14779586510008708,
"calibration/mean_confidence": 0.6198723345844213,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001128472222222232,
"completions/max_length": 3703.0,
"completions/max_terminated_length": 3703.0,
"completions/mean_length": 1151.5933349609375,
"completions/mean_terminated_length": 1152.89443359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 280.6,
"epoch": 1.8255896801289984,
"grad_norm": 0.0028999089263379574,
"learning_rate": 1.682692307692308e-06,
"loss": -0.0036,
"num_tokens": 2051306492.0,
"reward": 1.0116494297981262,
"reward_std": 0.0956185519695282,
"rewards/accuracy_reward": 0.7160590291023254,
"rewards/brier_reward": 0.8324668526649475,
"rewards/confidence_uniqueness_reward": 0.9463574290275574,
"rewards/format_reward": 0.9987847089767456,
"rewards/frontier_coverage_0": 0.02435053661465645,
"rewards/frontier_coverage_1": 0.02435053661465645,
"rewards/frontier_coverage_10": 0.054039137065410615,
"rewards/frontier_coverage_15": 0.11133374571800232,
"rewards/frontier_coverage_20": 0.18582258224487305,
"rewards/frontier_coverage_25": 0.2720319747924805,
"rewards/frontier_coverage_5": 0.02443299610167742,
"rewards/frontier_entropy_batch_reward": -0.336128431558609,
"signal/accuracy_reward/centered_abs_mean": 0.113134765625,
"signal/accuracy_reward/group_std_mean": 0.1469035863876343,
"signal/accuracy_reward/group_zero_std_frac": 0.5833333492279053,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9879292249679565,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0565673828125,
"signal/advantage_abs_mean": 0.7799701571464539,
"signal/advantage_pre_scale_abs_mean": 0.07455487251281738,
"signal/advantage_pre_scale_std": 0.11972530782222748,
"signal/advantage_std": 0.9827275753021241,
"signal/brier_reward/centered_abs_mean": 0.10523280948400497,
"signal/brier_reward/group_std_mean": 0.13444634526968002,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18512049913406373,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010523280873894692,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017059031501412393,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023790639638900758,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030060911551117897,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017059032339602708,
"signal/format_reward/centered_abs_mean": 0.002278645837213844,
"signal/format_reward/group_std_mean": 0.005544885993003845,
"signal/format_reward/group_zero_std_frac": 0.9722222089767456,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.019949254114180803,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001139322918606922,
"signal/frontier_coverage_0/centered_abs_mean": 0.14627977907657624,
"signal/frontier_coverage_0/group_std_mean": 0.185261470079422,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036748398840427396,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020918007707223297,
"signal/frontier_coverage_1/centered_abs_mean": 0.14627977907657624,
"signal/frontier_coverage_1/group_std_mean": 0.185261470079422,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036748398840427396,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020918007707223297,
"signal/frontier_coverage_10/centered_abs_mean": 0.0604740172624588,
"signal/frontier_coverage_10/group_std_mean": 0.0752995565533638,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015242612175643443,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008647784125059843,
"signal/frontier_coverage_15/centered_abs_mean": 0.08006558865308762,
"signal/frontier_coverage_15/group_std_mean": 0.098976169526577,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020191213116049767,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011449379147961737,
"signal/frontier_coverage_20/centered_abs_mean": 0.11131712347269059,
"signal/frontier_coverage_20/group_std_mean": 0.13847638368606568,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028045033290982246,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015918348217383027,
"signal/frontier_coverage_25/centered_abs_mean": 0.14897879362106323,
"signal/frontier_coverage_25/group_std_mean": 0.18610316812992095,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03750268965959549,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002130396710708737,
"signal/frontier_coverage_5/centered_abs_mean": 0.14582152664661407,
"signal/frontier_coverage_5/group_std_mean": 0.18467966616153716,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036631081253290176,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00208524779882282,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3315341889858246,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3946572482585907,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5843674898147583,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03315341994166374,
"step": 760
},
{
"calibration/aurc": 0.10008487753171016,
"calibration/batch_distribution_entropy": 0.9277099916065437,
"calibration/buffer_distribution_entropy": 0.9826846621379814,
"calibration/confidence_entropy": 0.490593213002777,
"calibration/coverage@0%": 0.2011480148342059,
"calibration/coverage@1%": 0.2521896815008726,
"calibration/coverage@10%": 0.45394033595113437,
"calibration/coverage@15%": 0.7749072862129144,
"calibration/coverage@20%": 0.8547420375218149,
"calibration/coverage@25%": 0.975,
"calibration/coverage@30%": 0.99375,
"calibration/coverage@5%": 0.3809200479930192,
"calibration/ece": 0.19351178340423209,
"calibration/mean_confidence": 0.6179748778086823,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0012152777777777902,
"completions/max_length": 3756.2,
"completions/max_terminated_length": 3756.2,
"completions/mean_length": 1263.1088623046876,
"completions/mean_terminated_length": 1264.64208984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 371.4,
"epoch": 1.8375895301308733,
"grad_norm": 0.0026731251273304224,
"learning_rate": 1.6526442307692309e-06,
"loss": 0.0046,
"num_tokens": 2068969922.0,
"reward": 1.014076590538025,
"reward_std": 0.09386872947216034,
"rewards/accuracy_reward": 0.7167534589767456,
"rewards/brier_reward": 0.8214235782623291,
"rewards/confidence_uniqueness_reward": 0.9496928334236145,
"rewards/format_reward": 0.9987847208976746,
"rewards/frontier_coverage_0": 0.015309961698949336,
"rewards/frontier_coverage_1": 0.015309961698949336,
"rewards/frontier_coverage_10": 0.0462947279214859,
"rewards/frontier_coverage_15": 0.10164597630500793,
"rewards/frontier_coverage_20": 0.17245526611804962,
"rewards/frontier_coverage_25": 0.2544937252998352,
"rewards/frontier_coverage_5": 0.015843074396252634,
"rewards/frontier_entropy_batch_reward": -0.29689528942108157,
"signal/accuracy_reward/centered_abs_mean": 0.10371636152267456,
"signal/accuracy_reward/group_std_mean": 0.14323081374168395,
"signal/accuracy_reward/group_zero_std_frac": 0.5666666626930237,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8753082990646363,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05185818076133728,
"signal/advantage_abs_mean": 0.764187490940094,
"signal/advantage_pre_scale_abs_mean": 0.07085417807102204,
"signal/advantage_pre_scale_std": 0.11614946275949478,
"signal/advantage_std": 0.9827946066856384,
"signal/brier_reward/centered_abs_mean": 0.10038460642099381,
"signal/brier_reward/group_std_mean": 0.1296519249677658,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16992796957492828,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010038460791110992,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015540579706430436,
"signal/confidence_uniqueness_reward/group_std_mean": 0.021095557510852812,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026413920521736144,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015540578635409475,
"signal/format_reward/centered_abs_mean": 0.002202690974809229,
"signal/format_reward/group_std_mean": 0.004494689032435417,
"signal/format_reward/group_zero_std_frac": 0.9805555462837219,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.018810847867280246,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0011013454874046146,
"signal/frontier_coverage_0/centered_abs_mean": 0.14608448445796968,
"signal/frontier_coverage_0/group_std_mean": 0.1897787034511566,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03529713377356529,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002089008130133152,
"signal/frontier_coverage_1/centered_abs_mean": 0.14608448445796968,
"signal/frontier_coverage_1/group_std_mean": 0.1897787034511566,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03529713377356529,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002089008130133152,
"signal/frontier_coverage_10/centered_abs_mean": 0.05769690573215484,
"signal/frontier_coverage_10/group_std_mean": 0.07240410298109054,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013960456103086471,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000825065781828016,
"signal/frontier_coverage_15/centered_abs_mean": 0.07257926762104035,
"signal/frontier_coverage_15/group_std_mean": 0.08965266942977905,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017614268139004706,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010378835606388748,
"signal/frontier_coverage_20/centered_abs_mean": 0.09984047561883927,
"signal/frontier_coverage_20/group_std_mean": 0.12426990419626235,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024254824593663217,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014277187641710044,
"signal/frontier_coverage_25/centered_abs_mean": 0.13424190729856492,
"signal/frontier_coverage_25/group_std_mean": 0.16846884191036224,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03261243365705013,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019196592504158616,
"signal/frontier_coverage_5/centered_abs_mean": 0.1453851044178009,
"signal/frontier_coverage_5/group_std_mean": 0.1888649046421051,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035127484053373334,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020790069829672575,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33021358251571653,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3971860229969025,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5606383442878723,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033021359145641326,
"step": 765
},
{
"calibration/aurc": 0.09759682681677295,
"calibration/batch_distribution_entropy": 0.9441530418407963,
"calibration/buffer_distribution_entropy": 0.982589112635219,
"calibration/confidence_entropy": 0.4846604716444439,
"calibration/coverage@0%": 0.14087299178769266,
"calibration/coverage@1%": 0.23946959753181796,
"calibration/coverage@10%": 0.5813764628333183,
"calibration/coverage@15%": 0.7180358140950996,
"calibration/coverage@20%": 0.8662592569437845,
"calibration/coverage@25%": 0.9567708333333332,
"calibration/coverage@30%": 0.9848958333333332,
"calibration/coverage@5%": 0.42531747960872474,
"calibration/ece": 0.16580303715657335,
"calibration/mean_confidence": 0.6179463742795799,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008420138888888906,
"completions/max_length": 3919.4,
"completions/max_terminated_length": 3919.4,
"completions/mean_length": 1410.801220703125,
"completions/mean_terminated_length": 1422.8932373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 377.6,
"epoch": 1.8495893801327483,
"grad_norm": 0.002413135953247547,
"learning_rate": 1.622596153846154e-06,
"loss": -0.0172,
"num_tokens": 2088303696.0,
"reward": 1.0102350831031799,
"reward_std": 0.10468598753213883,
"rewards/accuracy_reward": 0.7231770753860474,
"rewards/brier_reward": 0.820259952545166,
"rewards/confidence_uniqueness_reward": 0.9406996846199036,
"rewards/format_reward": 0.9915798664093017,
"rewards/frontier_coverage_0": 0.01387081373250112,
"rewards/frontier_coverage_1": 0.01387081373250112,
"rewards/frontier_coverage_10": 0.049073401093482974,
"rewards/frontier_coverage_15": 0.10792291015386582,
"rewards/frontier_coverage_20": 0.1824491500854492,
"rewards/frontier_coverage_25": 0.2690129905939102,
"rewards/frontier_coverage_5": 0.014235794026171788,
"rewards/frontier_entropy_batch_reward": -0.3254063129425049,
"signal/accuracy_reward/centered_abs_mean": 0.11168077290058136,
"signal/accuracy_reward/group_std_mean": 0.1492277979850769,
"signal/accuracy_reward/group_zero_std_frac": 0.5638889074325562,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9240718126296997,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05584038645029068,
"signal/advantage_abs_mean": 0.7747442603111268,
"signal/advantage_pre_scale_abs_mean": 0.08016434460878372,
"signal/advantage_pre_scale_std": 0.13522610068321228,
"signal/advantage_std": 0.98283451795578,
"signal/brier_reward/centered_abs_mean": 0.10873527824878693,
"signal/brier_reward/group_std_mean": 0.13798998296260834,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17993208169937133,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010873528011143208,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02466178871691227,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03505043126642704,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04082990363240242,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024661787785589693,
"signal/format_reward/centered_abs_mean": 0.01201714426279068,
"signal/format_reward/group_std_mean": 0.019468109123408794,
"signal/format_reward/group_zero_std_frac": 0.925000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0995325818657875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00600857213139534,
"signal/frontier_coverage_0/centered_abs_mean": 0.14618025720119476,
"signal/frontier_coverage_0/group_std_mean": 0.1870233803987503,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034592658281326294,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020903776632621883,
"signal/frontier_coverage_1/centered_abs_mean": 0.14618025720119476,
"signal/frontier_coverage_1/group_std_mean": 0.1870233803987503,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034592658281326294,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020903776632621883,
"signal/frontier_coverage_10/centered_abs_mean": 0.05835925862193107,
"signal/frontier_coverage_10/group_std_mean": 0.0733158677816391,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013812579214572906,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008345374371856451,
"signal/frontier_coverage_15/centered_abs_mean": 0.07714459002017975,
"signal/frontier_coverage_15/group_std_mean": 0.09536905735731124,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018274228647351266,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011031676083803176,
"signal/frontier_coverage_20/centered_abs_mean": 0.10645209103822709,
"signal/frontier_coverage_20/group_std_mean": 0.13283228129148483,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025221217051148416,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015222648857161404,
"signal/frontier_coverage_25/centered_abs_mean": 0.14250740706920623,
"signal/frontier_coverage_25/group_std_mean": 0.17897864580154418,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03375965058803558,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020378559362143277,
"signal/frontier_coverage_5/centered_abs_mean": 0.1453123450279236,
"signal/frontier_coverage_5/group_std_mean": 0.18592797815799714,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03438692018389702,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00207796657923609,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3373031973838806,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40034814476966857,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.55850750207901,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033730319887399676,
"step": 770
},
{
"calibration/aurc": 0.08260132080338653,
"calibration/batch_distribution_entropy": 0.9475478623002089,
"calibration/buffer_distribution_entropy": 0.982594037282311,
"calibration/confidence_entropy": 0.48343041044238033,
"calibration/coverage@0%": 0.1554240440457745,
"calibration/coverage@1%": 0.2635180388238424,
"calibration/coverage@10%": 0.722244240603098,
"calibration/coverage@15%": 0.7966044837091772,
"calibration/coverage@20%": 0.8446356305902141,
"calibration/coverage@25%": 0.8989537070808928,
"calibration/coverage@30%": 0.9299806189076503,
"calibration/coverage@5%": 0.6204585371542952,
"calibration/ece": 0.22259992062477957,
"calibration/mean_confidence": 0.5704307778797377,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007899305555555559,
"completions/max_length": 4067.8,
"completions/max_terminated_length": 4067.8,
"completions/mean_length": 1538.70595703125,
"completions/mean_terminated_length": 1550.9579833984376,
"completions/min_length": 0.0,
"completions/min_terminated_length": 450.6,
"epoch": 1.8615892301346233,
"grad_norm": 0.0024537527933716774,
"learning_rate": 1.592548076923077e-06,
"loss": -0.0182,
"num_tokens": 2109139444.0,
"reward": 1.02263902425766,
"reward_std": 0.10763338208198547,
"rewards/accuracy_reward": 0.7459201335906982,
"rewards/brier_reward": 0.813153886795044,
"rewards/confidence_uniqueness_reward": 0.9420896649360657,
"rewards/format_reward": 0.9921006917953491,
"rewards/frontier_coverage_0": -0.008578380825929344,
"rewards/frontier_coverage_1": -0.008578380825929344,
"rewards/frontier_coverage_10": 0.047513436526060104,
"rewards/frontier_coverage_15": 0.1067799985408783,
"rewards/frontier_coverage_20": 0.18154163658618927,
"rewards/frontier_coverage_25": 0.26863664388656616,
"rewards/frontier_coverage_5": -0.007999213365837931,
"rewards/frontier_entropy_batch_reward": -0.3018000781536102,
"signal/accuracy_reward/centered_abs_mean": 0.12080620676279068,
"signal/accuracy_reward/group_std_mean": 0.15733032971620559,
"signal/accuracy_reward/group_zero_std_frac": 0.5583333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0052472591400146,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06040310338139534,
"signal/advantage_abs_mean": 0.767851459980011,
"signal/advantage_pre_scale_abs_mean": 0.08170493394136429,
"signal/advantage_pre_scale_std": 0.1378851354122162,
"signal/advantage_std": 0.9828094124794007,
"signal/brier_reward/centered_abs_mean": 0.11120356619358063,
"signal/brier_reward/group_std_mean": 0.14290755689144136,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1852889508008957,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011120356805622577,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02482622042298317,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03989123106002808,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041999526694417,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024826219072565437,
"signal/format_reward/centered_abs_mean": 0.013058810774236918,
"signal/format_reward/group_std_mean": 0.025868096575140952,
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11041791215538979,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006529405387118459,
"signal/frontier_coverage_0/centered_abs_mean": 0.15929721891880036,
"signal/frontier_coverage_0/group_std_mean": 0.20568397343158723,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037854181975126265,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022779503371566532,
"signal/frontier_coverage_1/centered_abs_mean": 0.15929721891880036,
"signal/frontier_coverage_1/group_std_mean": 0.20568397343158723,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037854181975126265,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022779503371566532,
"signal/frontier_coverage_10/centered_abs_mean": 0.06027785316109657,
"signal/frontier_coverage_10/group_std_mean": 0.07613334357738495,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01442383099347353,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008619732921943069,
"signal/frontier_coverage_15/centered_abs_mean": 0.07339970767498016,
"signal/frontier_coverage_15/group_std_mean": 0.09090597331523895,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017760027572512625,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010496158269234003,
"signal/frontier_coverage_20/centered_abs_mean": 0.1007804036140442,
"signal/frontier_coverage_20/group_std_mean": 0.12484865486621857,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024397116526961325,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014411597978323698,
"signal/frontier_coverage_25/centered_abs_mean": 0.13568875044584275,
"signal/frontier_coverage_25/group_std_mean": 0.16859400570392608,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03280252404510975,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001940349186770618,
"signal/frontier_coverage_5/centered_abs_mean": 0.15811103582382202,
"signal/frontier_coverage_5/group_std_mean": 0.20422449707984924,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0375734880566597,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002260987856425345,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31984294652938844,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3863619029521942,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5378746867179871,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0319842953234911,
"step": 775
},
{
"calibration/aurc": 0.16485442668940056,
"calibration/batch_distribution_entropy": 0.9528827240900295,
"calibration/buffer_distribution_entropy": 0.9814258866145845,
"calibration/confidence_entropy": 0.47629329018267175,
"calibration/coverage@0%": 0.017224309267334672,
"calibration/coverage@1%": 0.017224309267334672,
"calibration/coverage@10%": 0.278715365113757,
"calibration/coverage@15%": 0.4485615693022387,
"calibration/coverage@20%": 0.7906145837605202,
"calibration/coverage@25%": 0.8919764412145321,
"calibration/coverage@30%": 0.9786417536988686,
"calibration/coverage@5%": 0.022432642600668008,
"calibration/ece": 0.14761790194580762,
"calibration/mean_confidence": 0.6011017510716694,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005902777777777768,
"completions/max_length": 4064.0,
"completions/max_terminated_length": 4064.0,
"completions/mean_length": 1609.5780517578125,
"completions/mean_terminated_length": 1619.094287109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 517.0,
"epoch": 1.8735890801364983,
"grad_norm": 0.0023489040322601795,
"learning_rate": 1.5625e-06,
"loss": -0.0162,
"num_tokens": 2130754455.0,
"reward": 1.02072274684906,
"reward_std": 0.10271647274494171,
"rewards/accuracy_reward": 0.7302083373069763,
"rewards/brier_reward": 0.8394347190856933,
"rewards/confidence_uniqueness_reward": 0.9440023183822632,
"rewards/format_reward": 0.9940972328186035,
"rewards/frontier_coverage_0": 0.023285062378272416,
"rewards/frontier_coverage_1": 0.023285062378272416,
"rewards/frontier_coverage_10": 0.05837507769465446,
"rewards/frontier_coverage_15": 0.11961217522621155,
"rewards/frontier_coverage_20": 0.1975026994943619,
"rewards/frontier_coverage_25": 0.2890557885169983,
"rewards/frontier_coverage_5": 0.023518830770626664,
"rewards/frontier_entropy_batch_reward": -0.3027906119823456,
"signal/accuracy_reward/centered_abs_mean": 0.10797525942325592,
"signal/accuracy_reward/group_std_mean": 0.1463481605052948,
"signal/accuracy_reward/group_zero_std_frac": 0.5666666865348816,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.917884886264801,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05398762971162796,
"signal/advantage_abs_mean": 0.7607001185417175,
"signal/advantage_pre_scale_abs_mean": 0.07600368112325669,
"signal/advantage_pre_scale_std": 0.13057213723659516,
"signal/advantage_std": 0.9827877879142761,
"signal/brier_reward/centered_abs_mean": 0.10208135396242142,
"signal/brier_reward/group_std_mean": 0.1331118553876877,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17384454905986785,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010208135098218917,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02246842011809349,
"signal/confidence_uniqueness_reward/group_std_mean": 0.035991473495960234,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03830303549766541,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002246842044405639,
"signal/format_reward/centered_abs_mean": 0.010036892350763082,
"signal/format_reward/group_std_mean": 0.02093004286289215,
"signal/format_reward/group_zero_std_frac": 0.9055555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08556498661637306,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005018446175381541,
"signal/frontier_coverage_0/centered_abs_mean": 0.1399555742740631,
"signal/frontier_coverage_0/group_std_mean": 0.18119101524353026,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03407430574297905,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020013647386804222,
"signal/frontier_coverage_1/centered_abs_mean": 0.1399555742740631,
"signal/frontier_coverage_1/group_std_mean": 0.18119101524353026,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03407430574297905,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020013647386804222,
"signal/frontier_coverage_10/centered_abs_mean": 0.0573968268930912,
"signal/frontier_coverage_10/group_std_mean": 0.0724210187792778,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013992871344089507,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00082077463157475,
"signal/frontier_coverage_15/centered_abs_mean": 0.07783315032720566,
"signal/frontier_coverage_15/group_std_mean": 0.096867735683918,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018964045867323875,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011130140395835043,
"signal/frontier_coverage_20/centered_abs_mean": 0.10844850391149521,
"signal/frontier_coverage_20/group_std_mean": 0.1354563981294632,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026414349675178528,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015508136246353387,
"signal/frontier_coverage_25/centered_abs_mean": 0.14559023976325988,
"signal/frontier_coverage_25/group_std_mean": 0.1825695514678955,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03545608147978783,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020819404162466526,
"signal/frontier_coverage_5/centered_abs_mean": 0.1386626899242401,
"signal/frontier_coverage_5/group_std_mean": 0.17956798374652863,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033758468180894854,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019828763790428637,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32271628379821776,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3866087257862091,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5493596196174622,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032271627336740494,
"step": 780
},
{
"calibration/aurc": 0.1465034067905237,
"calibration/batch_distribution_entropy": 0.9576631905455045,
"calibration/buffer_distribution_entropy": 0.9795268357527442,
"calibration/confidence_entropy": 0.4937558184431149,
"calibration/coverage@0%": 0.08272828751193013,
"calibration/coverage@1%": 0.08272828751193013,
"calibration/coverage@10%": 0.38023853512743166,
"calibration/coverage@15%": 0.5750353464069988,
"calibration/coverage@20%": 0.6699505205242648,
"calibration/coverage@25%": 0.8307626851522694,
"calibration/coverage@30%": 0.9249811051105523,
"calibration/coverage@5%": 0.2943209485996614,
"calibration/ece": 0.16532390242811154,
"calibration/mean_confidence": 0.6030139186483918,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555557,
"completions/max_length": 4063.2,
"completions/max_terminated_length": 4063.2,
"completions/mean_length": 1720.489404296875,
"completions/mean_terminated_length": 1738.4248291015624,
"completions/min_length": 0.0,
"completions/min_terminated_length": 532.2,
"epoch": 1.8855889301383733,
"grad_norm": 0.0023424793034791946,
"learning_rate": 1.5324519230769232e-06,
"loss": -0.0277,
"num_tokens": 2153651869.0,
"reward": 1.0122368812561036,
"reward_std": 0.1105235531926155,
"rewards/accuracy_reward": 0.7264757037162781,
"rewards/brier_reward": 0.8275084495544434,
"rewards/confidence_uniqueness_reward": 0.937448239326477,
"rewards/format_reward": 0.9897569417953491,
"rewards/frontier_coverage_0": 0.0164753757417202,
"rewards/frontier_coverage_1": 0.0164753757417202,
"rewards/frontier_coverage_10": 0.05674448758363724,
"rewards/frontier_coverage_15": 0.12070612460374833,
"rewards/frontier_coverage_20": 0.19932154715061187,
"rewards/frontier_coverage_25": 0.2911352813243866,
"rewards/frontier_coverage_5": 0.01682386063039303,
"rewards/frontier_entropy_batch_reward": -0.32637971043586733,
"signal/accuracy_reward/centered_abs_mean": 0.11615125834941864,
"signal/accuracy_reward/group_std_mean": 0.15916134268045426,
"signal/accuracy_reward/group_zero_std_frac": 0.5250000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.935201108455658,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05807562917470932,
"signal/advantage_abs_mean": 0.7558478832244873,
"signal/advantage_pre_scale_abs_mean": 0.08194233477115631,
"signal/advantage_pre_scale_std": 0.14115980565547942,
"signal/advantage_std": 0.9828650236129761,
"signal/brier_reward/centered_abs_mean": 0.10771840810775757,
"signal/brier_reward/group_std_mean": 0.14069525003433228,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1741844743490219,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010771840997040271,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02886640131473541,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04311688244342804,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04669438749551773,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002886640280485153,
"signal/format_reward/centered_abs_mean": 0.01532118059694767,
"signal/format_reward/group_std_mean": 0.0267114520072937,
"signal/format_reward/group_zero_std_frac": 0.8944444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12298648655414582,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007660590298473835,
"signal/frontier_coverage_0/centered_abs_mean": 0.14083233028650283,
"signal/frontier_coverage_0/group_std_mean": 0.18645595610141755,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03259415253996849,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002013902342878282,
"signal/frontier_coverage_1/centered_abs_mean": 0.14083233028650283,
"signal/frontier_coverage_1/group_std_mean": 0.18645595610141755,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03259415253996849,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002013902342878282,
"signal/frontier_coverage_10/centered_abs_mean": 0.060231783986091615,
"signal/frontier_coverage_10/group_std_mean": 0.07518986761569976,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013987057469785213,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008613145095296204,
"signal/frontier_coverage_15/centered_abs_mean": 0.08069218844175338,
"signal/frontier_coverage_15/group_std_mean": 0.09967537224292755,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01876749433577061,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011538982624188065,
"signal/frontier_coverage_20/centered_abs_mean": 0.11142556518316268,
"signal/frontier_coverage_20/group_std_mean": 0.13851696103811265,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02588377967476845,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015933856135234236,
"signal/frontier_coverage_25/centered_abs_mean": 0.14959222674369813,
"signal/frontier_coverage_25/group_std_mean": 0.18737058937549592,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03470103591680527,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021391689078882337,
"signal/frontier_coverage_5/centered_abs_mean": 0.13975331783294678,
"signal/frontier_coverage_5/group_std_mean": 0.18505517840385438,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032344093546271324,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001998472446575761,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3308142781257629,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39403237104415895,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5391006350517273,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03308142870664597,
"step": 785
},
{
"calibration/aurc": 0.1939321799769151,
"calibration/batch_distribution_entropy": 0.9231152057043847,
"calibration/buffer_distribution_entropy": 0.9792681924919983,
"calibration/confidence_entropy": 0.4568647554553452,
"calibration/coverage@0%": 0.14125862699222508,
"calibration/coverage@1%": 0.1814675042768204,
"calibration/coverage@10%": 0.28295346780291786,
"calibration/coverage@15%": 0.3981430680416006,
"calibration/coverage@20%": 0.7165041515188754,
"calibration/coverage@25%": 0.7863090890430093,
"calibration/coverage@30%": 0.8315137573932498,
"calibration/coverage@5%": 0.20597930801920508,
"calibration/ece": 0.12468192539573246,
"calibration/mean_confidence": 0.5811314275559181,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013281250000000022,
"completions/max_length": 4051.0,
"completions/max_terminated_length": 4051.0,
"completions/mean_length": 1706.2888427734374,
"completions/mean_terminated_length": 1730.19765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 504.6,
"epoch": 1.8975887801402482,
"grad_norm": 0.0025702903512865305,
"learning_rate": 1.5024038461538462e-06,
"loss": -0.0296,
"num_tokens": 2176396092.0,
"reward": 0.9944682359695435,
"reward_std": 0.12540389597415924,
"rewards/accuracy_reward": 0.6935763835906983,
"rewards/brier_reward": 0.8150904059410096,
"rewards/confidence_uniqueness_reward": 0.9365580677986145,
"rewards/format_reward": 0.98671875,
"rewards/frontier_coverage_0": 0.02789465319365263,
"rewards/frontier_coverage_1": 0.02789465319365263,
"rewards/frontier_coverage_10": 0.05078333094716072,
"rewards/frontier_coverage_15": 0.10327455699443817,
"rewards/frontier_coverage_20": 0.17147523760795594,
"rewards/frontier_coverage_25": 0.25191813707351685,
"rewards/frontier_coverage_5": 0.027757696248590945,
"rewards/frontier_entropy_batch_reward": -0.30296459794044495,
"signal/accuracy_reward/centered_abs_mean": 0.1440972253680229,
"signal/accuracy_reward/group_std_mean": 0.18764382898807525,
"signal/accuracy_reward/group_zero_std_frac": 0.472222226858139,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0437634110450744,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07204861268401146,
"signal/advantage_abs_mean": 0.7594870686531067,
"signal/advantage_pre_scale_abs_mean": 0.0951348215341568,
"signal/advantage_pre_scale_std": 0.15686687529087068,
"signal/advantage_std": 0.9830270886421204,
"signal/brier_reward/centered_abs_mean": 0.12329903990030289,
"signal/brier_reward/group_std_mean": 0.15922830402851104,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17916857302188874,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012329904362559319,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03225949928164482,
"signal/confidence_uniqueness_reward/group_std_mean": 0.050067192316055296,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04653703421354294,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003225949825718999,
"signal/format_reward/centered_abs_mean": 0.020838758908212185,
"signal/format_reward/group_std_mean": 0.0363934725522995,
"signal/format_reward/group_zero_std_frac": 0.8555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.14907054007053375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010419379454106092,
"signal/frontier_coverage_0/centered_abs_mean": 0.16697318553924562,
"signal/frontier_coverage_0/group_std_mean": 0.2151999741792679,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03466854318976402,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023877166211605074,
"signal/frontier_coverage_1/centered_abs_mean": 0.16697318553924562,
"signal/frontier_coverage_1/group_std_mean": 0.2151999741792679,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03466854318976402,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023877166211605074,
"signal/frontier_coverage_10/centered_abs_mean": 0.06191762536764145,
"signal/frontier_coverage_10/group_std_mean": 0.07758147418498992,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012927941419184208,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008854220737703145,
"signal/frontier_coverage_15/centered_abs_mean": 0.0792486310005188,
"signal/frontier_coverage_15/group_std_mean": 0.09801900982856751,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016577761620283127,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001133255404420197,
"signal/frontier_coverage_20/centered_abs_mean": 0.11002731919288636,
"signal/frontier_coverage_20/group_std_mean": 0.1369099199771881,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02301861494779587,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015733906766399741,
"signal/frontier_coverage_25/centered_abs_mean": 0.1496077835559845,
"signal/frontier_coverage_25/group_std_mean": 0.1872713029384613,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03128085993230343,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00213939119130373,
"signal/frontier_coverage_5/centered_abs_mean": 0.16589560508728027,
"signal/frontier_coverage_5/group_std_mean": 0.21386311054229737,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0344485942274332,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023723070975393058,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3218592584133148,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3871870756149292,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46996867656707764,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03218592554330826,
"step": 790
},
{
"calibration/aurc": 0.17420068402511013,
"calibration/batch_distribution_entropy": 0.936744799448956,
"calibration/buffer_distribution_entropy": 0.9791764432986675,
"calibration/confidence_entropy": 0.4700765218311792,
"calibration/coverage@0%": 0.044497148525959325,
"calibration/coverage@1%": 0.044497148525959325,
"calibration/coverage@10%": 0.370861900013158,
"calibration/coverage@15%": 0.48271767810026384,
"calibration/coverage@20%": 0.5339723504837293,
"calibration/coverage@25%": 0.7436015831134565,
"calibration/coverage@30%": 0.8457893579595428,
"calibration/coverage@5%": 0.21455714721016075,
"calibration/ece": 0.13920112303625717,
"calibration/mean_confidence": 0.6015508142066427,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00894097222222221,
"completions/max_length": 4013.4,
"completions/max_terminated_length": 4013.4,
"completions/mean_length": 1564.388037109375,
"completions/mean_terminated_length": 1578.45361328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 519.6,
"epoch": 1.9095886301421232,
"grad_norm": 0.002500551752746105,
"learning_rate": 1.4723557692307693e-06,
"loss": -0.026,
"num_tokens": 2197491826.0,
"reward": 0.9953348517417908,
"reward_std": 0.1119878500699997,
"rewards/accuracy_reward": 0.697743046283722,
"rewards/brier_reward": 0.8237994909286499,
"rewards/confidence_uniqueness_reward": 0.9378173589706421,
"rewards/format_reward": 0.9910590291023255,
"rewards/frontier_coverage_0": 0.03404067233204842,
"rewards/frontier_coverage_1": 0.03404067233204842,
"rewards/frontier_coverage_10": 0.051751085370779035,
"rewards/frontier_coverage_15": 0.10530708134174346,
"rewards/frontier_coverage_20": 0.17555021941661836,
"rewards/frontier_coverage_25": 0.25901117622852327,
"rewards/frontier_coverage_5": 0.03414784893393517,
"rewards/frontier_entropy_batch_reward": -0.35149884819984434,
"signal/accuracy_reward/centered_abs_mean": 0.1218315988779068,
"signal/accuracy_reward/group_std_mean": 0.15863377153873442,
"signal/accuracy_reward/group_zero_std_frac": 0.5583333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9944993138313294,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0609157994389534,
"signal/advantage_abs_mean": 0.7687033891677857,
"signal/advantage_pre_scale_abs_mean": 0.0843727320432663,
"signal/advantage_pre_scale_std": 0.1435972660779953,
"signal/advantage_std": 0.9828492641448975,
"signal/brier_reward/centered_abs_mean": 0.11527116298675537,
"signal/brier_reward/group_std_mean": 0.14814480543136596,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1880294054746628,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01152711659669876,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02728550471365452,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04235233888030052,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04486031234264374,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002728550648316741,
"signal/format_reward/centered_abs_mean": 0.01403537318110466,
"signal/format_reward/group_std_mean": 0.02631957270205021,
"signal/format_reward/group_zero_std_frac": 0.8916666626930236,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11467897593975067,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00701768659055233,
"signal/frontier_coverage_0/centered_abs_mean": 0.15350857526063919,
"signal/frontier_coverage_0/group_std_mean": 0.1942602276802063,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035571636632084846,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002195172756910324,
"signal/frontier_coverage_1/centered_abs_mean": 0.15350857526063919,
"signal/frontier_coverage_1/group_std_mean": 0.1942602276802063,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035571636632084846,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002195172756910324,
"signal/frontier_coverage_10/centered_abs_mean": 0.06201315745711326,
"signal/frontier_coverage_10/group_std_mean": 0.07682908922433854,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014492305181920528,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008867881610058248,
"signal/frontier_coverage_15/centered_abs_mean": 0.08047257363796234,
"signal/frontier_coverage_15/group_std_mean": 0.09959491640329361,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01899382472038269,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011507577961310743,
"signal/frontier_coverage_20/centered_abs_mean": 0.11074172109365463,
"signal/frontier_coverage_20/group_std_mean": 0.13823194950819015,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026179977133870123,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001583606656640768,
"signal/frontier_coverage_25/centered_abs_mean": 0.148504039645195,
"signal/frontier_coverage_25/group_std_mean": 0.1865270584821701,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03508494608104229,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002123607788234949,
"signal/frontier_coverage_5/centered_abs_mean": 0.15275688022375106,
"signal/frontier_coverage_5/group_std_mean": 0.19333274960517882,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03539147637784481,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002184423431754112,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3300867795944214,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3969386100769043,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5421956241130829,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033008677139878276,
"step": 795
},
{
"calibration/aurc": 0.1441095627724894,
"calibration/batch_distribution_entropy": 0.9483558736045365,
"calibration/buffer_distribution_entropy": 0.9798358679245261,
"calibration/confidence_entropy": 0.49228147452219206,
"calibration/coverage@0%": 0.12621647987116916,
"calibration/coverage@1%": 0.20550409929511188,
"calibration/coverage@10%": 0.5594735753813581,
"calibration/coverage@15%": 0.6802893555892628,
"calibration/coverage@20%": 0.7370080672589355,
"calibration/coverage@25%": 0.7793635170603675,
"calibration/coverage@30%": 0.8233267716535433,
"calibration/coverage@5%": 0.3705636289846919,
"calibration/ece": 0.14491320597761728,
"calibration/mean_confidence": 0.5801336887442534,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007291666666666652,
"completions/max_length": 4023.6,
"completions/max_terminated_length": 4023.6,
"completions/mean_length": 1477.7800048828126,
"completions/mean_terminated_length": 1488.652392578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 478.6,
"epoch": 1.9215884801439982,
"grad_norm": 0.002651046961545944,
"learning_rate": 1.4423076923076922e-06,
"loss": -0.0157,
"num_tokens": 2217642443.0,
"reward": 1.0029356360435486,
"reward_std": 0.12168239057064056,
"rewards/accuracy_reward": 0.70390625,
"rewards/brier_reward": 0.829195499420166,
"rewards/confidence_uniqueness_reward": 0.9415722489356995,
"rewards/format_reward": 0.9927083253860474,
"rewards/frontier_coverage_0": 0.03227963969111443,
"rewards/frontier_coverage_1": 0.03227963969111443,
"rewards/frontier_coverage_10": 0.050942166894674304,
"rewards/frontier_coverage_15": 0.10139497518539428,
"rewards/frontier_coverage_20": 0.17052144110202788,
"rewards/frontier_coverage_25": 0.2535334646701813,
"rewards/frontier_coverage_5": 0.03230700695421547,
"rewards/frontier_entropy_batch_reward": -0.32076034545898435,
"signal/accuracy_reward/centered_abs_mean": 0.13656141459941865,
"signal/accuracy_reward/group_std_mean": 0.18021790385246278,
"signal/accuracy_reward/group_zero_std_frac": 0.4888889014720917,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0031284928321837,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06828070729970932,
"signal/advantage_abs_mean": 0.760795509815216,
"signal/advantage_pre_scale_abs_mean": 0.09117430299520493,
"signal/advantage_pre_scale_std": 0.1516057848930359,
"signal/advantage_std": 0.9829917192459107,
"signal/brier_reward/centered_abs_mean": 0.10941434502601624,
"signal/brier_reward/group_std_mean": 0.14455374777317048,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16310821771621703,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010941434279084205,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024841461703181265,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03970448262989521,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03761226050555706,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024841462261974813,
"signal/format_reward/centered_abs_mean": 0.012597656343132257,
"signal/format_reward/group_std_mean": 0.024988747760653497,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09697704315185547,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006298828171566129,
"signal/frontier_coverage_0/centered_abs_mean": 0.1431431382894516,
"signal/frontier_coverage_0/group_std_mean": 0.18829217851161956,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03046076148748398,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002046946971677244,
"signal/frontier_coverage_1/centered_abs_mean": 0.1431431382894516,
"signal/frontier_coverage_1/group_std_mean": 0.18829217851161956,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03046076148748398,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002046946971677244,
"signal/frontier_coverage_10/centered_abs_mean": 0.05590105578303337,
"signal/frontier_coverage_10/group_std_mean": 0.071537347137928,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011949419602751732,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007993850973434746,
"signal/frontier_coverage_15/centered_abs_mean": 0.07858142256736755,
"signal/frontier_coverage_15/group_std_mean": 0.09816959351301194,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016770840622484684,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001123714353889227,
"signal/frontier_coverage_20/centered_abs_mean": 0.1134074330329895,
"signal/frontier_coverage_20/group_std_mean": 0.14195845723152162,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024147434905171395,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016217263182625175,
"signal/frontier_coverage_25/centered_abs_mean": 0.1560976982116699,
"signal/frontier_coverage_25/group_std_mean": 0.19557462632656097,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0331905759871006,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002232197020202875,
"signal/frontier_coverage_5/centered_abs_mean": 0.14286354184150696,
"signal/frontier_coverage_5/group_std_mean": 0.18793686628341674,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030401355773210525,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002042948640882969,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33393247723579406,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3993107795715332,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4993874430656433,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033393248543143274,
"step": 800
},
{
"epoch": 1.9215884801439982,
"eval_calibration/aurc": 0.1455259216944456,
"eval_calibration/batch_distribution_entropy": 0.8971357413105366,
"eval_calibration/buffer_distribution_entropy": 0.980327809840766,
"eval_calibration/confidence_entropy": 0.5021766405484772,
"eval_calibration/coverage@0%": 0.2746975806451613,
"eval_calibration/coverage@1%": 0.2746975806451613,
"eval_calibration/coverage@10%": 0.4625336021505377,
"eval_calibration/coverage@15%": 0.494119623655914,
"eval_calibration/coverage@20%": 0.8328293010752689,
"eval_calibration/coverage@25%": 0.9322916666666666,
"eval_calibration/coverage@30%": 1.0,
"eval_calibration/coverage@5%": 0.4102822580645162,
"eval_calibration/ece": 0.24091246639784947,
"eval_calibration/mean_confidence": 0.5817957661290323,
"eval_completions/clipped_ratio": 0.005208333333333352,
"eval_completions/max_length": 3359.1666666666665,
"eval_completions/max_terminated_length": 3359.1666666666665,
"eval_completions/mean_length": 1410.9085286458333,
"eval_completions/mean_terminated_length": 1418.3665771484375,
"eval_completions/min_length": 194.16666666666666,
"eval_completions/min_terminated_length": 507.1666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 2217642443.0,
"eval_reward": 0.920647660891215,
"eval_reward_std": 0.24002850552399954,
"eval_rewards/accuracy_reward": 0.6875,
"eval_rewards/brier_reward": 0.8208476801713308,
"eval_rewards/confidence_uniqueness_reward": 0.8881562054157257,
"eval_rewards/format_reward": 0.9921875099341074,
"eval_rewards/frontier_coverage_0": 0.03746247625288864,
"eval_rewards/frontier_coverage_1": 0.03746247625288864,
"eval_rewards/frontier_coverage_10": 0.04715126069883505,
"eval_rewards/frontier_coverage_15": 0.09141718472043674,
"eval_rewards/frontier_coverage_20": 0.1547790989279747,
"eval_rewards/frontier_coverage_25": 0.23214666545391083,
"eval_rewards/frontier_coverage_5": 0.037501002584273614,
"eval_rewards/frontier_entropy_batch_reward": -0.9921875099341074,
"eval_runtime": 219.7567,
"eval_samples_per_second": 4.55,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4129774272441864,
"eval_signal/accuracy_reward/group_std_mean": 0.46059202154477435,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8701687455177307,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2064887136220932,
"eval_signal/advantage_abs_mean": 0.8672041694323221,
"eval_signal/advantage_pre_scale_abs_mean": 0.20854839434226355,
"eval_signal/advantage_pre_scale_std": 0.23830395440260568,
"eval_signal/advantage_std": 0.9864075283209482,
"eval_signal/brier_reward/centered_abs_mean": 0.16755660126606622,
"eval_signal/brier_reward/group_std_mean": 0.22830546647310257,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07057205463449161,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.016755660995841026,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0519091517974933,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07648126035928726,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021853001477817696,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005190914962440729,
"eval_signal/format_reward/centered_abs_mean": 0.014919704912851254,
"eval_signal/format_reward/group_std_mean": 0.038215355637172856,
"eval_signal/format_reward/group_zero_std_frac": 0.8055555820465088,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.030461806803941727,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007459852456425627,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.26797403395175934,
"eval_signal/frontier_coverage_0/group_std_mean": 0.37435539563496906,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016153021560360987,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038320288294926286,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.26797403395175934,
"eval_signal/frontier_coverage_1/group_std_mean": 0.37435539563496906,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016153021560360987,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038320288294926286,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.07443711161613464,
"eval_signal/frontier_coverage_10/group_std_mean": 0.09980045631527901,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004489072676127155,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010644506934719782,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.10701891779899597,
"eval_signal/frontier_coverage_15/group_std_mean": 0.13961977263291678,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00645672227256,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015303705004043877,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.18605641275644302,
"eval_signal/frontier_coverage_20/group_std_mean": 0.23238414277633032,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011226917617022991,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026606065997232995,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2779003183046977,
"eval_signal/frontier_coverage_25/group_std_mean": 0.341300701101621,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01676830028494199,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003973974303031961,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2673551340897878,
"eval_signal/frontier_coverage_5/group_std_mean": 0.3735866844654083,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016115605210264523,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003823178354650736,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.014919704912851254,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.038215355637172856,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8055555820465088,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00609236132974426,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0014919705766563613,
"eval_steps_per_second": 0.027,
"step": 800
},
{
"epoch": 1.9215884801439982,
"step": 800,
"train_probe_calibration/aurc": 0.18083066108505683,
"train_probe_calibration/batch_distribution_entropy": 0.9208006951468654,
"train_probe_calibration/buffer_distribution_entropy": 0.9802754681502569,
"train_probe_calibration/confidence_entropy": 0.4683939042130995,
"train_probe_calibration/coverage@0%": 0.203125,
"train_probe_calibration/coverage@1%": 0.203125,
"train_probe_calibration/coverage@10%": 0.421875,
"train_probe_calibration/coverage@15%": 0.609375,
"train_probe_calibration/coverage@20%": 0.8125,
"train_probe_calibration/coverage@25%": 0.90625,
"train_probe_calibration/coverage@30%": 0.953125,
"train_probe_calibration/coverage@5%": 0.22395833333333334,
"train_probe_calibration/ece": 0.23921666666666672,
"train_probe_calibration/mean_confidence": 0.6079770833333333,
"train_probe_completions/clipped_ratio": 0.006770833333333337,
"train_probe_completions/max_length": 3489.3333333333335,
"train_probe_completions/max_terminated_length": 3489.3333333333335,
"train_probe_completions/mean_length": 1386.4324747721355,
"train_probe_completions/mean_terminated_length": 1395.5091552734375,
"train_probe_completions/min_length": 247.16666666666666,
"train_probe_completions/min_terminated_length": 441.6666666666667,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 2217642443.0,
"train_probe_reward": 0.9529338677724203,
"train_probe_reward_std": 0.22208184003829956,
"train_probe_rewards/accuracy_reward": 0.7491319477558136,
"train_probe_rewards/brier_reward": 0.8249579966068268,
"train_probe_rewards/confidence_uniqueness_reward": 0.8884093761444092,
"train_probe_rewards/format_reward": 0.995659718910853,
"train_probe_rewards/frontier_coverage_0": 0.0003041389087835948,
"train_probe_rewards/frontier_coverage_1": 0.0003041389087835948,
"train_probe_rewards/frontier_coverage_10": 0.046201564371585846,
"train_probe_rewards/frontier_coverage_15": 0.10657777761419614,
"train_probe_rewards/frontier_coverage_20": 0.18386120597521463,
"train_probe_rewards/frontier_coverage_25": 0.27541854977607727,
"train_probe_rewards/frontier_coverage_5": 0.00042533256297853467,
"train_probe_rewards/frontier_entropy_batch_reward": -0.995659718910853,
"train_probe_runtime": 199.9791,
"train_probe_samples_per_second": 5.001,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3640950520833333,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4310727119445801,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8359168668588003,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18204752604166666,
"train_probe_signal/advantage_abs_mean": 0.8151635825634003,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.18291218082110086,
"train_probe_signal/advantage_pre_scale_std": 0.22114630540211996,
"train_probe_signal/advantage_std": 0.9863705039024353,
"train_probe_signal/brier_reward/centered_abs_mean": 0.169136772553126,
"train_probe_signal/brier_reward/group_std_mean": 0.23165656626224518,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07761403918266296,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01691367772097389,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.049099608014027275,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.07007196421424548,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02254458951453368,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004909960941101114,
"train_probe_signal/format_reward/centered_abs_mean": 0.008409287935743729,
"train_probe_signal/format_reward/group_std_mean": 0.02455231888840596,
"train_probe_signal/format_reward/group_zero_std_frac": 0.8611111243565878,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018713080634673435,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004204643967871864,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.24267660826444626,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.362765575448672,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015939356448749702,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003470275589885811,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.24267660826444626,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.362765575448672,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015939356448749702,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003470275589885811,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0767682616909345,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.10428255423903465,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005042990514387687,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00109778616266946,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11625955998897552,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.14683445791403452,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0076437525761624174,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016625117083700995,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.19440337270498276,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.2381580794850985,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012779997972150644,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027799681605150304,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.28210918108622235,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.343789463241895,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018544109848638374,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004034161296052237,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.24214978516101837,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3620627323786418,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015904737481226523,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034627420051644244,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008409287935743729,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.02455231888840596,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8611111243565878,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.003742616313199202,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008409288323794802,
"train_probe_steps_per_second": 0.03
},
{
"calibration/aurc": 0.08876575446621993,
"calibration/batch_distribution_entropy": 0.9423436960308473,
"calibration/buffer_distribution_entropy": 0.9805957757457785,
"calibration/confidence_entropy": 0.4807342807779079,
"calibration/coverage@0%": 0.11765208751376892,
"calibration/coverage@1%": 0.2897543936392867,
"calibration/coverage@10%": 0.6165106442373482,
"calibration/coverage@15%": 0.8177444677953274,
"calibration/coverage@20%": 0.8942868407742388,
"calibration/coverage@25%": 0.9299153828277719,
"calibration/coverage@30%": 0.9597159378289339,
"calibration/coverage@5%": 0.48157154399538965,
"calibration/ece": 0.18224708996069522,
"calibration/mean_confidence": 0.6022935318017679,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004513888888888862,
"completions/max_length": 4028.4,
"completions/max_terminated_length": 4028.4,
"completions/mean_length": 1402.483349609375,
"completions/mean_terminated_length": 1408.866552734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 425.0,
"epoch": 1.9335883301458732,
"grad_norm": 0.002710554050281644,
"learning_rate": 1.4122596153846154e-06,
"loss": -0.0106,
"num_tokens": 2236900619.0,
"reward": 1.0090242743492126,
"reward_std": 0.10053354352712632,
"rewards/accuracy_reward": 0.7064236044883728,
"rewards/brier_reward": 0.8399913668632507,
"rewards/confidence_uniqueness_reward": 0.9446008086204529,
"rewards/format_reward": 0.9954861164093017,
"rewards/frontier_coverage_0": 0.04834661977365613,
"rewards/frontier_coverage_1": 0.04834661977365613,
"rewards/frontier_coverage_10": 0.056374243646860125,
"rewards/frontier_coverage_15": 0.1052817702293396,
"rewards/frontier_coverage_20": 0.1751980185508728,
"rewards/frontier_coverage_25": 0.2610403925180435,
"rewards/frontier_coverage_5": 0.04840220175683498,
"rewards/frontier_entropy_batch_reward": -0.31014604568481446,
"signal/accuracy_reward/centered_abs_mean": 0.10692274272441864,
"signal/accuracy_reward/group_std_mean": 0.14614808857440947,
"signal/accuracy_reward/group_zero_std_frac": 0.5555555641651153,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9182430386543274,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05346137136220932,
"signal/advantage_abs_mean": 0.7531801104545593,
"signal/advantage_pre_scale_abs_mean": 0.07416855543851852,
"signal/advantage_pre_scale_std": 0.12756477743387223,
"signal/advantage_std": 0.9827540397644043,
"signal/brier_reward/centered_abs_mean": 0.10358149409294129,
"signal/brier_reward/group_std_mean": 0.13555350452661513,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1797472804784775,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01035814955830574,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02066163383424282,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033146093413233754,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0357735026627779,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020661634393036366,
"signal/format_reward/centered_abs_mean": 0.008192274253815413,
"signal/format_reward/group_std_mean": 0.017970719560980796,
"signal/format_reward/group_zero_std_frac": 0.9166666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06989422589540481,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004096137126907706,
"signal/frontier_coverage_0/centered_abs_mean": 0.14836236834526062,
"signal/frontier_coverage_0/group_std_mean": 0.19158115983009338,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036716148257255554,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021215818589553235,
"signal/frontier_coverage_1/centered_abs_mean": 0.14836236834526062,
"signal/frontier_coverage_1/group_std_mean": 0.19158115983009338,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036716148257255554,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021215818589553235,
"signal/frontier_coverage_10/centered_abs_mean": 0.05951479524374008,
"signal/frontier_coverage_10/group_std_mean": 0.07481328845024109,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014785249903798103,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008510615676641464,
"signal/frontier_coverage_15/centered_abs_mean": 0.07461834698915482,
"signal/frontier_coverage_15/group_std_mean": 0.09241391271352768,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01858535371720791,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010670423740521072,
"signal/frontier_coverage_20/centered_abs_mean": 0.10110130459070206,
"signal/frontier_coverage_20/group_std_mean": 0.12569495439529418,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025172940641641616,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014457486337050795,
"signal/frontier_coverage_25/centered_abs_mean": 0.13566771894693375,
"signal/frontier_coverage_25/group_std_mean": 0.16943861842155455,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.033737773075699806,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019400483928620816,
"signal/frontier_coverage_5/centered_abs_mean": 0.14806557297706605,
"signal/frontier_coverage_5/group_std_mean": 0.19121136963367463,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036642659455537796,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021173376822844147,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3130863606929779,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3798399746417999,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5450647294521331,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03130863644182682,
"step": 805
},
{
"calibration/aurc": 0.1471249527656881,
"calibration/batch_distribution_entropy": 0.9672537796661741,
"calibration/buffer_distribution_entropy": 0.979552963929845,
"calibration/confidence_entropy": 0.49464106724349055,
"calibration/coverage@0%": 0.029417291554110984,
"calibration/coverage@1%": 0.06362781786990046,
"calibration/coverage@10%": 0.36135376187304225,
"calibration/coverage@15%": 0.5955494493668724,
"calibration/coverage@20%": 0.7921494204888049,
"calibration/coverage@25%": 0.8716848264548156,
"calibration/coverage@30%": 0.9671018276762402,
"calibration/coverage@5%": 0.10620979637637609,
"calibration/ece": 0.16179601206450783,
"calibration/mean_confidence": 0.5837775350143656,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005034722222222232,
"completions/max_length": 4013.2,
"completions/max_terminated_length": 4013.2,
"completions/mean_length": 1473.1595703125,
"completions/mean_terminated_length": 1480.6045166015624,
"completions/min_length": 0.0,
"completions/min_terminated_length": 476.6,
"epoch": 1.9455881801477481,
"grad_norm": 0.0026609154883772135,
"learning_rate": 1.3822115384615387e-06,
"loss": -0.0142,
"num_tokens": 2256970521.0,
"reward": 1.0169323682785034,
"reward_std": 0.11198469400405883,
"rewards/accuracy_reward": 0.7326388955116272,
"rewards/brier_reward": 0.8031673669815064,
"rewards/confidence_uniqueness_reward": 0.9455313444137573,
"rewards/format_reward": 0.9949652791023255,
"rewards/frontier_coverage_0": -0.009512295946478844,
"rewards/frontier_coverage_1": -0.009512295946478844,
"rewards/frontier_coverage_10": 0.03974997103214264,
"rewards/frontier_coverage_15": 0.09132075309753418,
"rewards/frontier_coverage_20": 0.15879679769277572,
"rewards/frontier_coverage_25": 0.2403422027826309,
"rewards/frontier_coverage_5": -0.009342345595359802,
"rewards/frontier_entropy_batch_reward": -0.28915963172912595,
"signal/accuracy_reward/centered_abs_mean": 0.1264214411377907,
"signal/accuracy_reward/group_std_mean": 0.17384180426597595,
"signal/accuracy_reward/group_zero_std_frac": 0.4777777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9332287669181824,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06321072056889535,
"signal/advantage_abs_mean": 0.7431563854217529,
"signal/advantage_pre_scale_abs_mean": 0.08071474879980087,
"signal/advantage_pre_scale_std": 0.13466430604457855,
"signal/advantage_std": 0.983002245426178,
"signal/brier_reward/centered_abs_mean": 0.11788292080163956,
"signal/brier_reward/group_std_mean": 0.15307863652706147,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17422791421413422,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011788292042911052,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021130212768912315,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03701507076621056,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03132249191403389,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00211302125826478,
"signal/format_reward/centered_abs_mean": 0.009461805690079927,
"signal/format_reward/group_std_mean": 0.02294406220316887,
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0699712760746479,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004730902845039964,
"signal/frontier_coverage_0/centered_abs_mean": 0.17159743010997772,
"signal/frontier_coverage_0/group_std_mean": 0.2192448854446411,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036260566860437396,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024538431782275437,
"signal/frontier_coverage_1/centered_abs_mean": 0.17159743010997772,
"signal/frontier_coverage_1/group_std_mean": 0.2192448854446411,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036260566860437396,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024538431782275437,
"signal/frontier_coverage_10/centered_abs_mean": 0.06220594048500061,
"signal/frontier_coverage_10/group_std_mean": 0.07851073145866394,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013184322603046894,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008895449223928154,
"signal/frontier_coverage_15/centered_abs_mean": 0.07340938150882721,
"signal/frontier_coverage_15/group_std_mean": 0.0915742427110672,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015591609664261341,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010497541399672628,
"signal/frontier_coverage_20/centered_abs_mean": 0.10003067702054977,
"signal/frontier_coverage_20/group_std_mean": 0.12545598596334456,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02125253602862358,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014304386451840401,
"signal/frontier_coverage_25/centered_abs_mean": 0.1350281298160553,
"signal/frontier_coverage_25/group_std_mean": 0.17083185017108918,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02868236191570759,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019309022929519415,
"signal/frontier_coverage_5/centered_abs_mean": 0.17116305530071257,
"signal/frontier_coverage_5/group_std_mean": 0.21869678497314454,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03616959452629089,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024476317223161457,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31781532168388366,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38577706813812257,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4724361836910248,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0317815326154232,
"step": 810
},
{
"calibration/aurc": 0.07738243799150339,
"calibration/batch_distribution_entropy": 0.9603774756717269,
"calibration/buffer_distribution_entropy": 0.9790119971436921,
"calibration/confidence_entropy": 0.47555495386025026,
"calibration/coverage@0%": 0.0898050077121467,
"calibration/coverage@1%": 0.24541858473564537,
"calibration/coverage@10%": 0.6958624603570598,
"calibration/coverage@15%": 0.8912864988448866,
"calibration/coverage@20%": 0.9743838256804231,
"calibration/coverage@25%": 0.9994778067885118,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.5200500577556628,
"calibration/ece": 0.21906389852930958,
"calibration/mean_confidence": 0.595564159124398,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00425347222222221,
"completions/max_length": 3920.6,
"completions/max_terminated_length": 3920.6,
"completions/mean_length": 1437.7090576171875,
"completions/mean_terminated_length": 1443.9518798828126,
"completions/min_length": 0.0,
"completions/min_terminated_length": 421.2,
"epoch": 1.9575880301496231,
"grad_norm": 0.0027806926518678665,
"learning_rate": 1.3521634615384617e-06,
"loss": -0.0134,
"num_tokens": 2276629761.0,
"reward": 1.0065173625946044,
"reward_std": 0.10549866706132889,
"rewards/accuracy_reward": 0.7015625,
"rewards/brier_reward": 0.809320867061615,
"rewards/confidence_uniqueness_reward": 0.9481490731239319,
"rewards/format_reward": 0.9957465171813965,
"rewards/frontier_coverage_0": 0.019368353858590127,
"rewards/frontier_coverage_1": 0.019368353858590127,
"rewards/frontier_coverage_10": 0.04486367180943489,
"rewards/frontier_coverage_15": 0.08716509938240051,
"rewards/frontier_coverage_20": 0.1480014741420746,
"rewards/frontier_coverage_25": 0.2240679919719696,
"rewards/frontier_coverage_5": 0.019489490240812302,
"rewards/frontier_entropy_batch_reward": -0.2592541307210922,
"signal/accuracy_reward/centered_abs_mean": 0.11748046725988388,
"signal/accuracy_reward/group_std_mean": 0.1602412313222885,
"signal/accuracy_reward/group_zero_std_frac": 0.5305555641651154,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9277384519577027,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05874023362994194,
"signal/advantage_abs_mean": 0.7442155957221985,
"signal/advantage_pre_scale_abs_mean": 0.07687741965055465,
"signal/advantage_pre_scale_std": 0.12851224541664125,
"signal/advantage_std": 0.982895040512085,
"signal/brier_reward/centered_abs_mean": 0.10857034474611282,
"signal/brier_reward/group_std_mean": 0.1429567039012909,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17236358523368836,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010857034847140313,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01928409282118082,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03310770466923714,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030435840785503387,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019284092588350177,
"signal/format_reward/centered_abs_mean": 0.0077636716421693565,
"signal/format_reward/group_std_mean": 0.0192112909629941,
"signal/format_reward/group_zero_std_frac": 0.9,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06007810868322849,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0038818358210846783,
"signal/frontier_coverage_0/centered_abs_mean": 0.16339569687843322,
"signal/frontier_coverage_0/group_std_mean": 0.21440580487251282,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03715735524892807,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023365584667772053,
"signal/frontier_coverage_1/centered_abs_mean": 0.16339569687843322,
"signal/frontier_coverage_1/group_std_mean": 0.21440580487251282,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03715735524892807,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023365584667772053,
"signal/frontier_coverage_10/centered_abs_mean": 0.058762216567993165,
"signal/frontier_coverage_10/group_std_mean": 0.07503211051225663,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013392175361514092,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008402997045777738,
"signal/frontier_coverage_15/centered_abs_mean": 0.06615000814199448,
"signal/frontier_coverage_15/group_std_mean": 0.0824548989534378,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015085921250283718,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009459450608119369,
"signal/frontier_coverage_20/centered_abs_mean": 0.08922545164823532,
"signal/frontier_coverage_20/group_std_mean": 0.11132535338401794,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020322853699326515,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012759239645674825,
"signal/frontier_coverage_25/centered_abs_mean": 0.1211626797914505,
"signal/frontier_coverage_25/group_std_mean": 0.152143993973732,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027561284601688385,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017326262313872576,
"signal/frontier_coverage_5/centered_abs_mean": 0.16293131411075593,
"signal/frontier_coverage_5/group_std_mean": 0.21380787193775178,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0370516188442707,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002329917624592781,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31487070918083193,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3838111996650696,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5012700438499451,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03148707151412964,
"step": 815
},
{
"calibration/aurc": 0.1261956584549119,
"calibration/batch_distribution_entropy": 0.9483172114138678,
"calibration/buffer_distribution_entropy": 0.977788883764146,
"calibration/confidence_entropy": 0.4780430829054336,
"calibration/coverage@0%": 0.20325120097585714,
"calibration/coverage@1%": 0.21160629235966916,
"calibration/coverage@10%": 0.567608430473606,
"calibration/coverage@15%": 0.6381378578085702,
"calibration/coverage@20%": 0.6888456583266518,
"calibration/coverage@25%": 0.7916421888598781,
"calibration/coverage@30%": 0.8010416666666667,
"calibration/coverage@5%": 0.48046569295107683,
"calibration/ece": 0.19220841936618543,
"calibration/mean_confidence": 0.602271614621774,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0057291666666666515,
"completions/max_length": 3918.0,
"completions/max_terminated_length": 3918.0,
"completions/mean_length": 1418.9080810546875,
"completions/mean_terminated_length": 1427.1089599609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 452.2,
"epoch": 1.969587880151498,
"grad_norm": 0.0029568555764853954,
"learning_rate": 1.3221153846153848e-06,
"loss": -0.019,
"num_tokens": 2296059358.0,
"reward": 1.01281396150589,
"reward_std": 0.10452383458614349,
"rewards/accuracy_reward": 0.7182291626930237,
"rewards/brier_reward": 0.8234552621841431,
"rewards/confidence_uniqueness_reward": 0.9444832563400268,
"rewards/format_reward": 0.9942708373069763,
"rewards/frontier_coverage_0": 0.025464657321572305,
"rewards/frontier_coverage_1": 0.025464657321572305,
"rewards/frontier_coverage_10": 0.05050650909543038,
"rewards/frontier_coverage_15": 0.10313679426908492,
"rewards/frontier_coverage_20": 0.17374457716941832,
"rewards/frontier_coverage_25": 0.2584977805614471,
"rewards/frontier_coverage_5": 0.025534218549728392,
"rewards/frontier_entropy_batch_reward": -0.29701498746871946,
"signal/accuracy_reward/centered_abs_mean": 0.11195746660232545,
"signal/accuracy_reward/group_std_mean": 0.1531495451927185,
"signal/accuracy_reward/group_zero_std_frac": 0.5388888895511628,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9098389506340027,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05597873330116272,
"signal/advantage_abs_mean": 0.7542153596878052,
"signal/advantage_pre_scale_abs_mean": 0.07647128999233246,
"signal/advantage_pre_scale_std": 0.12971136420965196,
"signal/advantage_std": 0.982850193977356,
"signal/brier_reward/centered_abs_mean": 0.10961567163467408,
"signal/brier_reward/group_std_mean": 0.1442680150270462,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.178808531165123,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010961567610502243,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022698301821947098,
"signal/confidence_uniqueness_reward/group_std_mean": 0.037622253969311716,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03659343495965004,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022698301589116456,
"signal/format_reward/centered_abs_mean": 0.010427517350763083,
"signal/format_reward/group_std_mean": 0.022845717146992683,
"signal/format_reward/group_zero_std_frac": 0.8944444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08172076642513275,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005213758675381541,
"signal/frontier_coverage_0/centered_abs_mean": 0.15467220842838286,
"signal/frontier_coverage_0/group_std_mean": 0.20240817666053773,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036262784898281095,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022118125576525926,
"signal/frontier_coverage_1/centered_abs_mean": 0.15467220842838286,
"signal/frontier_coverage_1/group_std_mean": 0.20240817666053773,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036262784898281095,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022118125576525926,
"signal/frontier_coverage_10/centered_abs_mean": 0.06070537865161896,
"signal/frontier_coverage_10/group_std_mean": 0.07693096548318863,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014242619462311267,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008680869126692414,
"signal/frontier_coverage_15/centered_abs_mean": 0.07251727730035781,
"signal/frontier_coverage_15/group_std_mean": 0.08975716978311539,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01698379050940275,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010369970346800982,
"signal/frontier_coverage_20/centered_abs_mean": 0.09770961105823517,
"signal/frontier_coverage_20/group_std_mean": 0.12141573280096055,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022855057194828988,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013972474029287697,
"signal/frontier_coverage_25/centered_abs_mean": 0.13089303821325302,
"signal/frontier_coverage_25/group_std_mean": 0.1633365660905838,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0305940430611372,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018717704573646189,
"signal/frontier_coverage_5/centered_abs_mean": 0.15423052310943602,
"signal/frontier_coverage_5/group_std_mean": 0.20185908377170564,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03615873046219349,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002205496421083808,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33060168027877807,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.396547919511795,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5411663591861725,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03306016884744167,
"step": 820
},
{
"calibration/aurc": 0.12442483011569842,
"calibration/batch_distribution_entropy": 0.9488465592185378,
"calibration/buffer_distribution_entropy": 0.9773383751335333,
"calibration/confidence_entropy": 0.4841768576504233,
"calibration/coverage@0%": 0.1472027306353351,
"calibration/coverage@1%": 0.15190246953872935,
"calibration/coverage@10%": 0.45165361183637937,
"calibration/coverage@15%": 0.7130344321148825,
"calibration/coverage@20%": 0.8033126631853786,
"calibration/coverage@25%": 0.8612108355091384,
"calibration/coverage@30%": 0.895097639251523,
"calibration/coverage@5%": 0.4119859660574412,
"calibration/ece": 0.2017781809453873,
"calibration/mean_confidence": 0.5740669150620106,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003211805555555536,
"completions/max_length": 3631.6,
"completions/max_terminated_length": 3631.6,
"completions/mean_length": 1321.271875,
"completions/mean_terminated_length": 1325.5808349609374,
"completions/min_length": 0.0,
"completions/min_terminated_length": 385.4,
"epoch": 1.981587730153373,
"grad_norm": 0.0029349022079259157,
"learning_rate": 1.292067307692308e-06,
"loss": -0.0096,
"num_tokens": 2314368794.0,
"reward": 1.0168671369552613,
"reward_std": 0.10342361778020859,
"rewards/accuracy_reward": 0.7309895634651185,
"rewards/brier_reward": 0.8359049558639526,
"rewards/confidence_uniqueness_reward": 0.9436007738113403,
"rewards/format_reward": 0.9967881798744201,
"rewards/frontier_coverage_0": 0.023777881916612387,
"rewards/frontier_coverage_1": 0.023777881916612387,
"rewards/frontier_coverage_10": 0.052507009357213974,
"rewards/frontier_coverage_15": 0.10996098518371582,
"rewards/frontier_coverage_20": 0.18512236475944518,
"rewards/frontier_coverage_25": 0.27436395883560183,
"rewards/frontier_coverage_5": 0.023881060443818568,
"rewards/frontier_entropy_batch_reward": -0.3488785922527313,
"signal/accuracy_reward/centered_abs_mean": 0.11349283754825593,
"signal/accuracy_reward/group_std_mean": 0.15641495883464812,
"signal/accuracy_reward/group_zero_std_frac": 0.5333333313465118,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9231669783592225,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05674641877412796,
"signal/advantage_abs_mean": 0.7529475927352905,
"signal/advantage_pre_scale_abs_mean": 0.07502718269824982,
"signal/advantage_pre_scale_std": 0.12588909417390823,
"signal/advantage_std": 0.9828490853309632,
"signal/brier_reward/centered_abs_mean": 0.10705066174268722,
"signal/brier_reward/group_std_mean": 0.1404803767800331,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17473995983600615,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010705066099762916,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020141271874308586,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03240118809044361,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03299994915723801,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020141272805631162,
"signal/format_reward/centered_abs_mean": 0.006114366184920073,
"signal/format_reward/group_std_mean": 0.015425614640116691,
"signal/format_reward/group_zero_std_frac": 0.9222222447395325,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04925214573740959,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0030571830924600364,
"signal/frontier_coverage_0/centered_abs_mean": 0.15044747292995453,
"signal/frontier_coverage_0/group_std_mean": 0.19402441680431365,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035044976323843,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021513988031074405,
"signal/frontier_coverage_1/centered_abs_mean": 0.15044747292995453,
"signal/frontier_coverage_1/group_std_mean": 0.19402441680431365,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035044976323843,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021513988031074405,
"signal/frontier_coverage_10/centered_abs_mean": 0.05916027277708054,
"signal/frontier_coverage_10/group_std_mean": 0.07418683767318726,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013846796937286854,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008459919597953558,
"signal/frontier_coverage_15/centered_abs_mean": 0.0763422504067421,
"signal/frontier_coverage_15/group_std_mean": 0.09492785483598709,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017957745492458342,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010916941799223423,
"signal/frontier_coverage_20/centered_abs_mean": 0.10528118759393693,
"signal/frontier_coverage_20/group_std_mean": 0.13137973099946976,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02475803196430206,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015055209165439009,
"signal/frontier_coverage_25/centered_abs_mean": 0.1406207025051117,
"signal/frontier_coverage_25/group_std_mean": 0.1765914499759674,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03303173556923866,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002010876010172069,
"signal/frontier_coverage_5/centered_abs_mean": 0.14978999197483062,
"signal/frontier_coverage_5/group_std_mean": 0.1932007133960724,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03489072918891907,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002141996775753796,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3424068748950958,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4095862090587616,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.562820303440094,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034240689128637314,
"step": 825
},
{
"calibration/aurc": 0.09355573925524277,
"calibration/batch_distribution_entropy": 0.9612319655245531,
"calibration/buffer_distribution_entropy": 0.9780537442600634,
"calibration/confidence_entropy": 0.4896003885731581,
"calibration/coverage@0%": 0.04173466057441253,
"calibration/coverage@1%": 0.04173466057441253,
"calibration/coverage@10%": 0.6361591601392516,
"calibration/coverage@15%": 0.790623640121845,
"calibration/coverage@20%": 0.9227154046997388,
"calibration/coverage@25%": 0.9744125326370756,
"calibration/coverage@30%": 0.993733681462141,
"calibration/coverage@5%": 0.37932985204525677,
"calibration/ece": 0.2052610825050439,
"calibration/mean_confidence": 0.5847677710929464,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00269097222222221,
"completions/max_length": 3762.2,
"completions/max_terminated_length": 3762.2,
"completions/mean_length": 1352.4456787109375,
"completions/mean_terminated_length": 1356.0873046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 341.0,
"epoch": 1.993587580155248,
"grad_norm": 0.0030781906098127365,
"learning_rate": 1.2620192307692309e-06,
"loss": -0.0042,
"num_tokens": 2333073032.0,
"reward": 1.0112749218940735,
"reward_std": 0.10696900635957718,
"rewards/accuracy_reward": 0.7067708253860474,
"rewards/brier_reward": 0.8296258449554443,
"rewards/confidence_uniqueness_reward": 0.9480892419815063,
"rewards/format_reward": 0.9972222208976745,
"rewards/frontier_coverage_0": 0.0333960821852088,
"rewards/frontier_coverage_1": 0.0333960821852088,
"rewards/frontier_coverage_10": 0.0535750538110733,
"rewards/frontier_coverage_15": 0.1058346152305603,
"rewards/frontier_coverage_20": 0.17506144642829896,
"rewards/frontier_coverage_25": 0.25745048820972444,
"rewards/frontier_coverage_5": 0.03353348933160305,
"rewards/frontier_entropy_batch_reward": -0.28392277359962464,
"signal/accuracy_reward/centered_abs_mean": 0.1243598073720932,
"signal/accuracy_reward/group_std_mean": 0.16650678515434264,
"signal/accuracy_reward/group_zero_std_frac": 0.5083333492279053,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9852775573730469,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0621799036860466,
"signal/advantage_abs_mean": 0.7518176913261414,
"signal/advantage_pre_scale_abs_mean": 0.08001424670219422,
"signal/advantage_pre_scale_std": 0.13119888603687285,
"signal/advantage_std": 0.9828913331031799,
"signal/brier_reward/centered_abs_mean": 0.10915734171867371,
"signal/brier_reward/group_std_mean": 0.14323717653751372,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17464982271194457,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010915734060108661,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018232964724302293,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029694054275751114,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02886694110929966,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018232964677736164,
"signal/format_reward/centered_abs_mean": 0.00530598945915699,
"signal/format_reward/group_std_mean": 0.013867205008864403,
"signal/format_reward/group_zero_std_frac": 0.9277778029441833,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.041387615352869035,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002652994729578495,
"signal/frontier_coverage_0/centered_abs_mean": 0.15533825755119324,
"signal/frontier_coverage_0/group_std_mean": 0.20087738037109376,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03541974872350693,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022213370073586704,
"signal/frontier_coverage_1/centered_abs_mean": 0.15533825755119324,
"signal/frontier_coverage_1/group_std_mean": 0.20087738037109376,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03541974872350693,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022213370073586704,
"signal/frontier_coverage_10/centered_abs_mean": 0.06064486652612686,
"signal/frontier_coverage_10/group_std_mean": 0.07602840662002563,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01384783312678337,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008672215277329087,
"signal/frontier_coverage_15/centered_abs_mean": 0.07716170549392701,
"signal/frontier_coverage_15/group_std_mean": 0.09574876427650451,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01760086081922054,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011034123599529266,
"signal/frontier_coverage_20/centered_abs_mean": 0.10649595856666565,
"signal/frontier_coverage_20/group_std_mean": 0.133084973692894,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024280770123004912,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015228921081870794,
"signal/frontier_coverage_25/centered_abs_mean": 0.14340307414531708,
"signal/frontier_coverage_25/group_std_mean": 0.18047136664390565,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.032692290097475055,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002050663949921727,
"signal/frontier_coverage_5/centered_abs_mean": 0.15442144870758057,
"signal/frontier_coverage_5/group_std_mean": 0.19973163902759553,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03521168828010559,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022082267329096793,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32058742046356203,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3872204661369324,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5107461273670196,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032058742642402646,
"step": 830
},
{
"calibration/aurc": 0.10569611704478651,
"calibration/batch_distribution_entropy": 0.9533977736379946,
"calibration/buffer_distribution_entropy": 0.978567734903438,
"calibration/confidence_entropy": 0.47113983265831044,
"calibration/coverage@0%": 0.12089054077299177,
"calibration/coverage@1%": 0.14536970743965844,
"calibration/coverage@10%": 0.5777594846873447,
"calibration/coverage@15%": 0.7742097968754984,
"calibration/coverage@20%": 0.868240380321609,
"calibration/coverage@25%": 0.9299003458504778,
"calibration/coverage@30%": 0.9843095976925074,
"calibration/coverage@5%": 0.36547769173285216,
"calibration/ece": 0.16692479621857995,
"calibration/mean_confidence": 0.6082626008419189,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0028645833333333483,
"completions/max_length": 3270.6,
"completions/max_terminated_length": 3270.6,
"completions/mean_length": 1294.222314453125,
"completions/mean_terminated_length": 1298.0444580078124,
"completions/min_length": 136.8,
"completions/min_terminated_length": 464.8,
"epoch": 2.007199910001125,
"grad_norm": 0.002975533716380596,
"learning_rate": 1.231971153846154e-06,
"loss": -0.0119,
"num_tokens": 2351306124.0,
"reward": 1.0097980260849,
"reward_std": 0.10146247148513794,
"rewards/accuracy_reward": 0.7085069417953491,
"rewards/brier_reward": 0.823908519744873,
"rewards/confidence_uniqueness_reward": 0.9468066334724426,
"rewards/format_reward": 0.9962673664093018,
"rewards/frontier_coverage_0": 0.02436054665595293,
"rewards/frontier_coverage_1": 0.02436054665595293,
"rewards/frontier_coverage_10": 0.049334879219532016,
"rewards/frontier_coverage_15": 0.10209986120462418,
"rewards/frontier_coverage_20": 0.17016193866729737,
"rewards/frontier_coverage_25": 0.25037443935871123,
"rewards/frontier_coverage_5": 0.02455103537067771,
"rewards/frontier_entropy_batch_reward": -0.288876348733902,
"signal/accuracy_reward/centered_abs_mean": 0.10684678852558135,
"signal/accuracy_reward/group_std_mean": 0.14497185051441192,
"signal/accuracy_reward/group_zero_std_frac": 0.5666666686534881,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8739351272583008,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05342339426279068,
"signal/advantage_abs_mean": 0.7597023010253906,
"signal/advantage_pre_scale_abs_mean": 0.07467978298664094,
"signal/advantage_pre_scale_std": 0.12498285323381424,
"signal/advantage_std": 0.9828245639801025,
"signal/brier_reward/centered_abs_mean": 0.11109703779220581,
"signal/brier_reward/group_std_mean": 0.14594402611255647,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18416101932525636,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011109703965485097,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019707629829645155,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03326699696481228,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03271297216415405,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001970763085409999,
"signal/format_reward/centered_abs_mean": 0.007090928731486201,
"signal/format_reward/group_std_mean": 0.01796768419444561,
"signal/format_reward/group_zero_std_frac": 0.9083333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.057955706119537355,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0035454643657431006,
"signal/frontier_coverage_0/centered_abs_mean": 0.14938410818576814,
"signal/frontier_coverage_0/group_std_mean": 0.19320706129074097,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035366549342870715,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002136192889884114,
"signal/frontier_coverage_1/centered_abs_mean": 0.14938410818576814,
"signal/frontier_coverage_1/group_std_mean": 0.19320706129074097,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035366549342870715,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002136192889884114,
"signal/frontier_coverage_10/centered_abs_mean": 0.059661376476287845,
"signal/frontier_coverage_10/group_std_mean": 0.07549520283937454,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014192548766732215,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008531576604582369,
"signal/frontier_coverage_15/centered_abs_mean": 0.07816312313079835,
"signal/frontier_coverage_15/group_std_mean": 0.09718612283468246,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018611904233694077,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011177326552569867,
"signal/frontier_coverage_20/centered_abs_mean": 0.10804884135723114,
"signal/frontier_coverage_20/group_std_mean": 0.1343923181295395,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025684969499707222,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015450984239578248,
"signal/frontier_coverage_25/centered_abs_mean": 0.14375920593738556,
"signal/frontier_coverage_25/group_std_mean": 0.17935467660427093,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03412468209862709,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002055756654590368,
"signal/frontier_coverage_5/centered_abs_mean": 0.14854101240634918,
"signal/frontier_coverage_5/group_std_mean": 0.19215008020401,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035167403519153595,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021241364534944295,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3249393939971924,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39019683599472044,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5427081823348999,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03249393925070763,
"step": 835
},
{
"calibration/aurc": 0.062211177501246516,
"calibration/batch_distribution_entropy": 0.9600943074730827,
"calibration/buffer_distribution_entropy": 0.977535601973992,
"calibration/confidence_entropy": 0.47148411925605904,
"calibration/coverage@0%": 0.14299342105263158,
"calibration/coverage@1%": 0.4756743421052632,
"calibration/coverage@10%": 0.7766829705152934,
"calibration/coverage@15%": 0.8547742433636447,
"calibration/coverage@20%": 0.9250560588316341,
"calibration/coverage@25%": 0.9696335078534031,
"calibration/coverage@30%": 0.9837696335078533,
"calibration/coverage@5%": 0.6397520552034536,
"calibration/ece": 0.2028160098956898,
"calibration/mean_confidence": 0.5834283529912395,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00616319444444442,
"completions/max_length": 3825.8,
"completions/max_terminated_length": 3825.8,
"completions/mean_length": 1376.5011474609375,
"completions/mean_terminated_length": 1385.176806640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 430.6,
"epoch": 2.019199760003,
"grad_norm": 0.002842454006895423,
"learning_rate": 1.201923076923077e-06,
"loss": -0.0196,
"num_tokens": 2370261017.0,
"reward": 1.0190519213676452,
"reward_std": 0.11003706753253936,
"rewards/accuracy_reward": 0.7313368082046509,
"rewards/brier_reward": 0.8243500471115113,
"rewards/confidence_uniqueness_reward": 0.9446738362312317,
"rewards/format_reward": 0.9938368082046509,
"rewards/frontier_coverage_0": 0.01120219323784113,
"rewards/frontier_coverage_1": 0.01120219323784113,
"rewards/frontier_coverage_10": 0.04924294427037239,
"rewards/frontier_coverage_15": 0.10741689503192901,
"rewards/frontier_coverage_20": 0.18002953827381135,
"rewards/frontier_coverage_25": 0.2659532427787781,
"rewards/frontier_coverage_5": 0.011461955774575473,
"rewards/frontier_entropy_batch_reward": -0.29539363384246825,
"signal/accuracy_reward/centered_abs_mean": 0.114794921875,
"signal/accuracy_reward/group_std_mean": 0.15858063697814942,
"signal/accuracy_reward/group_zero_std_frac": 0.5194444596767426,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9148180603981018,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0573974609375,
"signal/advantage_abs_mean": 0.7386746406555176,
"signal/advantage_pre_scale_abs_mean": 0.07803614884614944,
"signal/advantage_pre_scale_std": 0.13629978895187378,
"signal/advantage_std": 0.9828796863555909,
"signal/brier_reward/centered_abs_mean": 0.10765846222639083,
"signal/brier_reward/group_std_mean": 0.1435864210128784,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17238323390483856,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010765845887362957,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0232498437166214,
"signal/confidence_uniqueness_reward/group_std_mean": 0.041168955340981486,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03694990836083889,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002324984478764236,
"signal/format_reward/centered_abs_mean": 0.011539713572710752,
"signal/format_reward/group_std_mean": 0.027077178843319415,
"signal/format_reward/group_zero_std_frac": 0.869444465637207,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09020622819662094,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005769856786355376,
"signal/frontier_coverage_0/centered_abs_mean": 0.1507784366607666,
"signal/frontier_coverage_0/group_std_mean": 0.1961473524570465,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03440270908176899,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002156131574884057,
"signal/frontier_coverage_1/centered_abs_mean": 0.1507784366607666,
"signal/frontier_coverage_1/group_std_mean": 0.1961473524570465,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03440270908176899,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002156131574884057,
"signal/frontier_coverage_10/centered_abs_mean": 0.05933835953474045,
"signal/frontier_coverage_10/group_std_mean": 0.07431373596191407,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01358701903373003,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008485385100357234,
"signal/frontier_coverage_15/centered_abs_mean": 0.07423074394464493,
"signal/frontier_coverage_15/group_std_mean": 0.09245937913656235,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017081401497125625,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010614996077492832,
"signal/frontier_coverage_20/centered_abs_mean": 0.10113731771707535,
"signal/frontier_coverage_20/group_std_mean": 0.12667881697416306,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023303528502583503,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014462636318057775,
"signal/frontier_coverage_25/centered_abs_mean": 0.13548611998558044,
"signal/frontier_coverage_25/group_std_mean": 0.1705509215593338,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031227792799472808,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019374514231458306,
"signal/frontier_coverage_5/centered_abs_mean": 0.15002332031726837,
"signal/frontier_coverage_5/group_std_mean": 0.19519493579864503,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034229816496372224,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002145333564840257,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33091223835945127,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39722990393638613,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5324559807777405,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03309122622013092,
"step": 840
},
{
"calibration/aurc": 0.06684569611947105,
"calibration/batch_distribution_entropy": 0.9320840327223838,
"calibration/buffer_distribution_entropy": 0.9765756609311985,
"calibration/confidence_entropy": 0.519962933260064,
"calibration/coverage@0%": 0.3502802096278069,
"calibration/coverage@1%": 0.35970463864047325,
"calibration/coverage@10%": 0.725888842557783,
"calibration/coverage@15%": 0.8328692453679034,
"calibration/coverage@20%": 0.9629242819843341,
"calibration/coverage@25%": 0.9900783289817232,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.5334907507356006,
"calibration/ece": 0.21881182634444443,
"calibration/mean_confidence": 0.6166823207944025,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006944444444444464,
"completions/max_length": 3702.8,
"completions/max_terminated_length": 3702.8,
"completions/mean_length": 1365.2091064453125,
"completions/mean_terminated_length": 1374.738330078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 376.0,
"epoch": 2.031199610004875,
"grad_norm": 0.0029446668922901154,
"learning_rate": 1.1718750000000001e-06,
"loss": -0.0191,
"num_tokens": 2389087778.0,
"reward": 1.0194933891296387,
"reward_std": 0.10787245631217957,
"rewards/accuracy_reward": 0.7318576455116272,
"rewards/brier_reward": 0.8298116207122803,
"rewards/confidence_uniqueness_reward": 0.9431999564170838,
"rewards/format_reward": 0.9930555582046509,
"rewards/frontier_coverage_0": 0.01630272523034364,
"rewards/frontier_coverage_1": 0.01630272523034364,
"rewards/frontier_coverage_10": 0.05233140736818313,
"rewards/frontier_coverage_15": 0.10671553313732147,
"rewards/frontier_coverage_20": 0.17818537950515748,
"rewards/frontier_coverage_25": 0.26444960534572604,
"rewards/frontier_coverage_5": 0.016577059985138476,
"rewards/frontier_entropy_batch_reward": -0.2957174897193909,
"signal/accuracy_reward/centered_abs_mean": 0.11165906935930252,
"signal/accuracy_reward/group_std_mean": 0.1545466125011444,
"signal/accuracy_reward/group_zero_std_frac": 0.5277777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8903607368469239,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05582953467965126,
"signal/advantage_abs_mean": 0.7452172636985779,
"signal/advantage_pre_scale_abs_mean": 0.07781935185194015,
"signal/advantage_pre_scale_std": 0.13558341413736344,
"signal/advantage_std": 0.9828800678253173,
"signal/brier_reward/centered_abs_mean": 0.10683204084634781,
"signal/brier_reward/group_std_mean": 0.14029796719551085,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17178708612918853,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010683204606175422,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023753628134727478,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04016609191894531,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03838530480861664,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023753628134727477,
"signal/format_reward/centered_abs_mean": 0.011631944589316845,
"signal/format_reward/group_std_mean": 0.02555925101041794,
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09416337609291077,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005815972294658423,
"signal/frontier_coverage_0/centered_abs_mean": 0.14774567186832427,
"signal/frontier_coverage_0/group_std_mean": 0.19174781441688538,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0339319072663784,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021127631422132253,
"signal/frontier_coverage_1/centered_abs_mean": 0.14774567186832427,
"signal/frontier_coverage_1/group_std_mean": 0.19174781441688538,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0339319072663784,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021127631422132253,
"signal/frontier_coverage_10/centered_abs_mean": 0.05867672711610794,
"signal/frontier_coverage_10/group_std_mean": 0.07409504801034927,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013528883457183838,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008390772040002048,
"signal/frontier_coverage_15/centered_abs_mean": 0.07549417465925216,
"signal/frontier_coverage_15/group_std_mean": 0.09374563097953796,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017411107011139394,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010795666836202144,
"signal/frontier_coverage_20/centered_abs_mean": 0.10412525534629821,
"signal/frontier_coverage_20/group_std_mean": 0.12972887754440307,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02397709749639034,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014889911515638232,
"signal/frontier_coverage_25/centered_abs_mean": 0.13956733644008637,
"signal/frontier_coverage_25/group_std_mean": 0.17463013529777527,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03210580088198185,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019958128686994314,
"signal/frontier_coverage_5/centered_abs_mean": 0.14707699418067932,
"signal/frontier_coverage_5/group_std_mean": 0.19090475738048554,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03377884775400162,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002103201043792069,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32507652044296265,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3902816414833069,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5230625331401825,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03250765353441239,
"step": 845
},
{
"calibration/aurc": 0.15014207230116053,
"calibration/batch_distribution_entropy": 0.9652176849919577,
"calibration/buffer_distribution_entropy": 0.9773341124664008,
"calibration/confidence_entropy": 0.45897563079350806,
"calibration/coverage@0%": 0.13444436090568168,
"calibration/coverage@1%": 0.1975359079030707,
"calibration/coverage@10%": 0.35414943679685046,
"calibration/coverage@15%": 0.5254633339826573,
"calibration/coverage@20%": 0.734143564401551,
"calibration/coverage@25%": 0.864620228038039,
"calibration/coverage@30%": 0.9110765706806283,
"calibration/coverage@5%": 0.23821666290742236,
"calibration/ece": 0.22260068186711415,
"calibration/mean_confidence": 0.5354948923959031,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006163194444444442,
"completions/max_length": 3740.4,
"completions/max_terminated_length": 3740.4,
"completions/mean_length": 1421.7507080078126,
"completions/mean_terminated_length": 1430.629345703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 429.6,
"epoch": 2.04319946000675,
"grad_norm": 0.0027419920079410076,
"learning_rate": 1.141826923076923e-06,
"loss": -0.015,
"num_tokens": 2408581834.0,
"reward": 1.0171156525611877,
"reward_std": 0.12138309180736542,
"rewards/accuracy_reward": 0.7256944417953491,
"rewards/brier_reward": 0.817634391784668,
"rewards/confidence_uniqueness_reward": 0.9451643347740173,
"rewards/format_reward": 0.9938368082046509,
"rewards/frontier_coverage_0": 0.008699403330683707,
"rewards/frontier_coverage_1": 0.008699403330683707,
"rewards/frontier_coverage_10": 0.049815284460783,
"rewards/frontier_coverage_15": 0.10400655418634415,
"rewards/frontier_coverage_20": 0.17297520637512206,
"rewards/frontier_coverage_25": 0.2560495167970657,
"rewards/frontier_coverage_5": 0.008948711678385735,
"rewards/frontier_entropy_batch_reward": -0.2764132499694824,
"signal/accuracy_reward/centered_abs_mean": 0.1402560740709305,
"signal/accuracy_reward/group_std_mean": 0.18668433427810668,
"signal/accuracy_reward/group_zero_std_frac": 0.4666666805744171,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0340290307998656,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07012803703546525,
"signal/advantage_abs_mean": 0.7511513710021973,
"signal/advantage_pre_scale_abs_mean": 0.08810898214578629,
"signal/advantage_pre_scale_std": 0.14673225283622743,
"signal/advantage_std": 0.9830058932304382,
"signal/brier_reward/centered_abs_mean": 0.1129622220993042,
"signal/brier_reward/group_std_mean": 0.1479853630065918,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16706807613372804,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011296222917735577,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023049880191683768,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0411870576441288,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03394843973219395,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023049880284816025,
"signal/format_reward/centered_abs_mean": 0.011420355923473836,
"signal/format_reward/group_std_mean": 0.027312709763646126,
"signal/format_reward/group_zero_std_frac": 0.8638888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08352030664682389,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005710177961736918,
"signal/frontier_coverage_0/centered_abs_mean": 0.1656607449054718,
"signal/frontier_coverage_0/group_std_mean": 0.2115688681602478,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035052116960287094,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023689485620707273,
"signal/frontier_coverage_1/centered_abs_mean": 0.1656607449054718,
"signal/frontier_coverage_1/group_std_mean": 0.2115688681602478,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035052116960287094,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023689485620707273,
"signal/frontier_coverage_10/centered_abs_mean": 0.06097555160522461,
"signal/frontier_coverage_10/group_std_mean": 0.07610448449850082,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012921417132019997,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008719503879547119,
"signal/frontier_coverage_15/centered_abs_mean": 0.07464765012264252,
"signal/frontier_coverage_15/group_std_mean": 0.09246674776077271,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01581343188881874,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010674613760784269,
"signal/frontier_coverage_20/centered_abs_mean": 0.10256357938051223,
"signal/frontier_coverage_20/group_std_mean": 0.12776158303022384,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02171347513794899,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001466659177094698,
"signal/frontier_coverage_25/centered_abs_mean": 0.1391077905893326,
"signal/frontier_coverage_25/group_std_mean": 0.17436096370220183,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029439039155840875,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019892414566129446,
"signal/frontier_coverage_5/centered_abs_mean": 0.1648542582988739,
"signal/frontier_coverage_5/group_std_mean": 0.21056585609912873,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034882017970085145,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002357415994629264,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31900513768196104,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3865148961544037,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47265112996101377,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031900516524910924,
"step": 850
},
{
"epoch": 2.04319946000675,
"eval_calibration/aurc": 0.10331722642995035,
"eval_calibration/batch_distribution_entropy": 0.8994859346965384,
"eval_calibration/buffer_distribution_entropy": 0.9778323392141767,
"eval_calibration/confidence_entropy": 0.48347415095907587,
"eval_calibration/coverage@0%": 0.33602729885057475,
"eval_calibration/coverage@1%": 0.33602729885057475,
"eval_calibration/coverage@10%": 0.5599856321839081,
"eval_calibration/coverage@15%": 0.7857399425287356,
"eval_calibration/coverage@20%": 0.8602729885057471,
"eval_calibration/coverage@25%": 0.9301364942528735,
"eval_calibration/coverage@30%": 0.9780890804597702,
"eval_calibration/coverage@5%": 0.3776939655172414,
"eval_calibration/ece": 0.2081085308908046,
"eval_calibration/mean_confidence": 0.6086531788793104,
"eval_completions/clipped_ratio": 0.00434027777777779,
"eval_completions/max_length": 3287.5,
"eval_completions/max_terminated_length": 3287.5,
"eval_completions/mean_length": 1345.1163330078125,
"eval_completions/mean_terminated_length": 1350.9969482421875,
"eval_completions/min_length": 255.16666666666666,
"eval_completions/min_terminated_length": 472.5,
"eval_loss": 0.0,
"eval_num_tokens": 2408581834.0,
"eval_reward": 0.9270426034927368,
"eval_reward_std": 0.23688022047281265,
"eval_rewards/accuracy_reward": 0.6944444477558136,
"eval_rewards/brier_reward": 0.8301023046175638,
"eval_rewards/confidence_uniqueness_reward": 0.8897854586442312,
"eval_rewards/format_reward": 0.9947916766007742,
"eval_rewards/frontier_coverage_0": 0.03768664660553137,
"eval_rewards/frontier_coverage_1": 0.03768664660553137,
"eval_rewards/frontier_coverage_10": 0.052901595210035644,
"eval_rewards/frontier_coverage_15": 0.1036976898709933,
"eval_rewards/frontier_coverage_20": 0.17143141478300095,
"eval_rewards/frontier_coverage_25": 0.2521692191561063,
"eval_rewards/frontier_coverage_5": 0.037777805080016456,
"eval_rewards/frontier_entropy_batch_reward": -0.9947916766007742,
"eval_runtime": 212.5872,
"eval_samples_per_second": 4.704,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4058159738779068,
"eval_signal/accuracy_reward/group_std_mean": 0.454753835995992,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8737364013989767,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2029079869389534,
"eval_signal/advantage_abs_mean": 0.8658012747764587,
"eval_signal/advantage_pre_scale_abs_mean": 0.20703220119078955,
"eval_signal/advantage_pre_scale_std": 0.23602647334337234,
"eval_signal/advantage_std": 0.9863978525002798,
"eval_signal/brier_reward/centered_abs_mean": 0.163881945113341,
"eval_signal/brier_reward/group_std_mean": 0.22143561144669852,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07060187309980392,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01638819541161259,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04664057493209839,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06534135589996974,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020085140131413937,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00466405728366226,
"eval_signal/format_reward/centered_abs_mean": 0.009765624844779571,
"eval_signal/format_reward/group_std_mean": 0.022957629524171352,
"eval_signal/format_reward/group_zero_std_frac": 0.8888889153798422,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.020531928166747093,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.004882812422389786,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.24889005223910013,
"eval_signal/frontier_coverage_0/group_std_mean": 0.34743093450864154,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015377589967101812,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003559127605209748,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.24889005223910013,
"eval_signal/frontier_coverage_1/group_std_mean": 0.34743093450864154,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015377589967101812,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003559127605209748,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.07524702822168668,
"eval_signal/frontier_coverage_10/group_std_mean": 0.0998810629049937,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004650625012194117,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001076032465789467,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.12017117316524188,
"eval_signal/frontier_coverage_15/group_std_mean": 0.1525182550152143,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007422773788372676,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017184477183036506,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.20055429637432098,
"eval_signal/frontier_coverage_20/group_std_mean": 0.24715026964743933,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012374301285793384,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028679263778030872,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.294106458624204,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3577578862508138,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018138304352760315,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004205722206582625,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.24757558852434158,
"eval_signal/frontier_coverage_5/group_std_mean": 0.3458026399215062,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01529612842326363,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00354033091571182,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.009765624844779571,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022957629524171352,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8888889153798422,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004106385710959633,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0009765625,
"eval_steps_per_second": 0.028,
"step": 850
},
{
"epoch": 2.04319946000675,
"step": 850,
"train_probe_calibration/aurc": 0.12330988667450855,
"train_probe_calibration/batch_distribution_entropy": 0.903959760016461,
"train_probe_calibration/buffer_distribution_entropy": 0.9778106939337934,
"train_probe_calibration/confidence_entropy": 0.4707082194129657,
"train_probe_calibration/coverage@0%": 0.28461021505376344,
"train_probe_calibration/coverage@1%": 0.28461021505376344,
"train_probe_calibration/coverage@10%": 0.5561155913978495,
"train_probe_calibration/coverage@15%": 0.6920362903225806,
"train_probe_calibration/coverage@20%": 0.9163306451612904,
"train_probe_calibration/coverage@25%": 0.9791666666666666,
"train_probe_calibration/coverage@30%": 1.0,
"train_probe_calibration/coverage@5%": 0.3111559139784946,
"train_probe_calibration/ece": 0.2055883400537634,
"train_probe_calibration/mean_confidence": 0.6214233534946236,
"train_probe_completions/clipped_ratio": 0.00434027777777779,
"train_probe_completions/max_length": 3570.5,
"train_probe_completions/max_terminated_length": 3570.5,
"train_probe_completions/mean_length": 1370.541015625,
"train_probe_completions/mean_terminated_length": 1376.5172932942708,
"train_probe_completions/min_length": 171.33333333333334,
"train_probe_completions/min_terminated_length": 486.0,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 2408581834.0,
"train_probe_reward": 0.9569332003593445,
"train_probe_reward_std": 0.2246442437171936,
"train_probe_rewards/accuracy_reward": 0.7491319477558136,
"train_probe_rewards/brier_reward": 0.84639111161232,
"train_probe_rewards/confidence_uniqueness_reward": 0.8930891950925192,
"train_probe_rewards/format_reward": 0.9947916766007742,
"train_probe_rewards/frontier_coverage_0": 0.02118841770182674,
"train_probe_rewards/frontier_coverage_1": 0.02118841770182674,
"train_probe_rewards/frontier_coverage_10": 0.05621129460632801,
"train_probe_rewards/frontier_coverage_15": 0.11968943352500598,
"train_probe_rewards/frontier_coverage_20": 0.1998051976164182,
"train_probe_rewards/frontier_coverage_25": 0.2949647903442383,
"train_probe_rewards/frontier_coverage_5": 0.02139244688441977,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9947916766007742,
"train_probe_runtime": 211.7592,
"train_probe_samples_per_second": 4.722,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3678927918275197,
"train_probe_signal/accuracy_reward/group_std_mean": 0.43377458055814105,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8337254126866659,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18394639591375986,
"train_probe_signal/advantage_abs_mean": 0.8216431637605032,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.18601106852293015,
"train_probe_signal/advantage_pre_scale_std": 0.22373904784520468,
"train_probe_signal/advantage_std": 0.9863761961460114,
"train_probe_signal/brier_reward/centered_abs_mean": 0.14684191594521204,
"train_probe_signal/brier_reward/group_std_mean": 0.20539081345001856,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06664901288847129,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.014684191749741634,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04518438751498858,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.06575708587964375,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020526379346847534,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045184389455243945,
"train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475,
"train_probe_signal/format_reward/group_std_mean": 0.02946278266608715,
"train_probe_signal/format_reward/group_zero_std_frac": 0.8333333631356558,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022930872005720932,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.23663152754306793,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.34504841764767963,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015345245134085417,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033838309658070407,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.23663152754306793,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.34504841764767963,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015345245134085417,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033838309658070407,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07336462040742238,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0970181276400884,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004758586253349979,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00104911407106556,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1168044979373614,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.14624296625455221,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007575858850032091,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001670304317182551,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.19161372631788254,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.2350246881445249,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012425205515076717,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027400763938203454,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2768913333614667,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.33837322890758514,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01795490738004446,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003959546098485589,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.23545273641745249,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.34352175891399384,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01526872410128514,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033669740660116076,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.010091145522892475,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.02946278266608715,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333631356558,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004586174463232358,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0010091145910943549,
"train_probe_steps_per_second": 0.028
},
{
"calibration/aurc": 0.19908929956399884,
"calibration/batch_distribution_entropy": 0.921269447034111,
"calibration/buffer_distribution_entropy": 0.9780586621449909,
"calibration/confidence_entropy": 0.4840395335947276,
"calibration/coverage@0%": 0.005749607669798455,
"calibration/coverage@1%": 0.005749607669798455,
"calibration/coverage@10%": 0.34102231999068,
"calibration/coverage@15%": 0.6641502710333531,
"calibration/coverage@20%": 0.7498762866868071,
"calibration/coverage@25%": 0.7665864694544311,
"calibration/coverage@30%": 0.802620535042992,
"calibration/coverage@5%": 0.11384360244786633,
"calibration/ece": 0.16123747199751368,
"calibration/mean_confidence": 0.6117934088268526,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0043402777777777676,
"completions/max_length": 3814.6,
"completions/max_terminated_length": 3814.6,
"completions/mean_length": 1304.929931640625,
"completions/mean_terminated_length": 1310.61953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 327.4,
"epoch": 2.055199310008625,
"grad_norm": 0.0032066632993519306,
"learning_rate": 1.1117788461538462e-06,
"loss": -0.0148,
"num_tokens": 2426706307.0,
"reward": 1.0196407675743102,
"reward_std": 0.10803952366113663,
"rewards/accuracy_reward": 0.7427083373069763,
"rewards/brier_reward": 0.817469346523285,
"rewards/confidence_uniqueness_reward": 0.9436403512954712,
"rewards/format_reward": 0.9956597208976745,
"rewards/frontier_coverage_0": -0.006897637248039245,
"rewards/frontier_coverage_1": -0.006897637248039245,
"rewards/frontier_coverage_10": 0.046662700921297075,
"rewards/frontier_coverage_15": 0.10604156851768494,
"rewards/frontier_coverage_20": 0.17919767796993255,
"rewards/frontier_coverage_25": 0.26617750227451326,
"rewards/frontier_coverage_5": -0.006526473723351955,
"rewards/frontier_entropy_batch_reward": -0.3391614556312561,
"signal/accuracy_reward/centered_abs_mean": 0.11490885317325591,
"signal/accuracy_reward/group_std_mean": 0.1600183442234993,
"signal/accuracy_reward/group_zero_std_frac": 0.5111111223697662,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.880127203464508,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05745442658662796,
"signal/advantage_abs_mean": 0.7468799233436585,
"signal/advantage_pre_scale_abs_mean": 0.07798066586256028,
"signal/advantage_pre_scale_std": 0.13145326524972917,
"signal/advantage_std": 0.9829369902610778,
"signal/brier_reward/centered_abs_mean": 0.11005659252405167,
"signal/brier_reward/group_std_mean": 0.14418595135211945,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1698257029056549,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01100565940141678,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021314630657434462,
"signal/confidence_uniqueness_reward/group_std_mean": 0.035215172171592715,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03296075724065304,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021314630983397366,
"signal/format_reward/centered_abs_mean": 0.00807291679084301,
"signal/format_reward/group_std_mean": 0.01913252491503954,
"signal/format_reward/group_zero_std_frac": 0.9055555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06147683933377266,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004036458395421505,
"signal/frontier_coverage_0/centered_abs_mean": 0.14297114163637162,
"signal/frontier_coverage_0/group_std_mean": 0.1866467148065567,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03151693716645241,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002044487278908491,
"signal/frontier_coverage_1/centered_abs_mean": 0.14297114163637162,
"signal/frontier_coverage_1/group_std_mean": 0.1866467148065567,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03151693716645241,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002044487278908491,
"signal/frontier_coverage_10/centered_abs_mean": 0.059609665721654895,
"signal/frontier_coverage_10/group_std_mean": 0.07471666783094406,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013211605697870254,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000852418236900121,
"signal/frontier_coverage_15/centered_abs_mean": 0.08127216696739196,
"signal/frontier_coverage_15/group_std_mean": 0.10074764937162399,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018062039092183114,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001162191992625594,
"signal/frontier_coverage_20/centered_abs_mean": 0.11325040906667709,
"signal/frontier_coverage_20/group_std_mean": 0.1412164866924286,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025165878981351853,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016194808762520553,
"signal/frontier_coverage_25/centered_abs_mean": 0.15194992423057557,
"signal/frontier_coverage_25/group_std_mean": 0.190808442234993,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03373695760965347,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002172883879393339,
"signal/frontier_coverage_5/centered_abs_mean": 0.14223289340734482,
"signal/frontier_coverage_5/group_std_mean": 0.18570739328861235,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031353960186243056,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020339304348453878,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34247671365737914,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40856011509895324,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5324842154979705,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03424767255783081,
"step": 855
},
{
"calibration/aurc": 0.10892732221259172,
"calibration/batch_distribution_entropy": 0.942645479055105,
"calibration/buffer_distribution_entropy": 0.9767172763640286,
"calibration/confidence_entropy": 0.4767007364941224,
"calibration/coverage@0%": 0.042236584444139066,
"calibration/coverage@1%": 0.0833824177774724,
"calibration/coverage@10%": 0.471758608744446,
"calibration/coverage@15%": 0.8324888776281435,
"calibration/coverage@20%": 0.8920896317163667,
"calibration/coverage@25%": 0.9370618329439788,
"calibration/coverage@30%": 0.9763157894736842,
"calibration/coverage@5%": 0.3179804377948788,
"calibration/ece": 0.16581658387299025,
"calibration/mean_confidence": 0.6103028231313843,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004079861111111116,
"completions/max_length": 4006.8,
"completions/max_terminated_length": 4006.8,
"completions/mean_length": 1369.608349609375,
"completions/mean_terminated_length": 1375.258837890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 353.2,
"epoch": 2.0671991600105,
"grad_norm": 0.002877203281968832,
"learning_rate": 1.0817307692307693e-06,
"loss": -0.0101,
"num_tokens": 2445551011.0,
"reward": 1.0163455367088319,
"reward_std": 0.10174518972635269,
"rewards/accuracy_reward": 0.7261284708976745,
"rewards/brier_reward": 0.8230874896049499,
"rewards/confidence_uniqueness_reward": 0.945272159576416,
"rewards/format_reward": 0.9959201574325561,
"rewards/frontier_coverage_0": 0.011111350171267987,
"rewards/frontier_coverage_1": 0.011111350171267987,
"rewards/frontier_coverage_10": 0.048263268917798995,
"rewards/frontier_coverage_15": 0.10801017582416535,
"rewards/frontier_coverage_20": 0.1811635434627533,
"rewards/frontier_coverage_25": 0.2692377507686615,
"rewards/frontier_coverage_5": 0.01138177290558815,
"rewards/frontier_entropy_batch_reward": -0.3067070960998535,
"signal/accuracy_reward/centered_abs_mean": 0.10654839426279068,
"signal/accuracy_reward/group_std_mean": 0.15034933537244796,
"signal/accuracy_reward/group_zero_std_frac": 0.5333333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8724170207977295,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05327419713139534,
"signal/advantage_abs_mean": 0.7521509408950806,
"signal/advantage_pre_scale_abs_mean": 0.07447454035282135,
"signal/advantage_pre_scale_std": 0.1254849314689636,
"signal/advantage_std": 0.9828340649604798,
"signal/brier_reward/centered_abs_mean": 0.10740028470754623,
"signal/brier_reward/group_std_mean": 0.13960683047771455,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1774017930030823,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010740028135478497,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01953975111246109,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030278518795967102,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03224896155297756,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019539752043783664,
"signal/format_reward/centered_abs_mean": 0.006157769076526165,
"signal/format_reward/group_std_mean": 0.01401168517768383,
"signal/format_reward/group_zero_std_frac": 0.9305555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0502224363386631,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0030788845382630826,
"signal/frontier_coverage_0/centered_abs_mean": 0.1405676171183586,
"signal/frontier_coverage_0/group_std_mean": 0.18494743406772612,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03321279361844063,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020101168891415,
"signal/frontier_coverage_1/centered_abs_mean": 0.1405676171183586,
"signal/frontier_coverage_1/group_std_mean": 0.18494743406772612,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03321279361844063,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020101168891415,
"signal/frontier_coverage_10/centered_abs_mean": 0.06137363091111183,
"signal/frontier_coverage_10/group_std_mean": 0.07704001814126968,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014510192163288593,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008776429109275341,
"signal/frontier_coverage_15/centered_abs_mean": 0.0798090323805809,
"signal/frontier_coverage_15/group_std_mean": 0.09855622947216033,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018882869556546212,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001141269225627184,
"signal/frontier_coverage_20/centered_abs_mean": 0.1102443590760231,
"signal/frontier_coverage_20/group_std_mean": 0.1372637167572975,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02606889493763447,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015764942625537514,
"signal/frontier_coverage_25/centered_abs_mean": 0.14848661720752715,
"signal/frontier_coverage_25/group_std_mean": 0.18640778362751007,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03508574143052101,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021233585197478534,
"signal/frontier_coverage_5/centered_abs_mean": 0.1398726522922516,
"signal/frontier_coverage_5/group_std_mean": 0.1840555638074875,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033049411699175836,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020001789554953573,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32406482100486755,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3888309359550476,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5364414274692535,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03240648210048676,
"step": 860
},
{
"calibration/aurc": 0.11880237943016916,
"calibration/batch_distribution_entropy": 0.9515152013772351,
"calibration/buffer_distribution_entropy": 0.9756482987390385,
"calibration/confidence_entropy": 0.4727274902334776,
"calibration/coverage@0%": 0.12918026544821584,
"calibration/coverage@1%": 0.1520969321148825,
"calibration/coverage@10%": 0.5746899477806788,
"calibration/coverage@15%": 0.6884165578764143,
"calibration/coverage@20%": 0.7405474869451697,
"calibration/coverage@25%": 0.8483953437771976,
"calibration/coverage@30%": 0.9348917536988687,
"calibration/coverage@5%": 0.37449276544821586,
"calibration/ece": 0.1899077945890448,
"calibration/mean_confidence": 0.6083666128032529,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0028645833333333483,
"completions/max_length": 3851.4,
"completions/max_terminated_length": 3851.4,
"completions/mean_length": 1410.0517333984376,
"completions/mean_terminated_length": 1414.1339599609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 460.8,
"epoch": 2.079199010012375,
"grad_norm": 0.0028341130819171667,
"learning_rate": 1.0516826923076925e-06,
"loss": -0.0089,
"num_tokens": 2464906903.0,
"reward": 1.033591103553772,
"reward_std": 0.09442763477563858,
"rewards/accuracy_reward": 0.7585069417953492,
"rewards/brier_reward": 0.8452976226806641,
"rewards/confidence_uniqueness_reward": 0.9443268656730652,
"rewards/format_reward": 0.9971354365348816,
"rewards/frontier_coverage_0": 0.008574995025992394,
"rewards/frontier_coverage_1": 0.008574995025992394,
"rewards/frontier_coverage_10": 0.05472532734274864,
"rewards/frontier_coverage_15": 0.12366417050361633,
"rewards/frontier_coverage_20": 0.20866862833499908,
"rewards/frontier_coverage_25": 0.3106157422065735,
"rewards/frontier_coverage_5": 0.008856690488755703,
"rewards/frontier_entropy_batch_reward": -0.335411924123764,
"signal/accuracy_reward/centered_abs_mean": 0.09484591782093048,
"signal/accuracy_reward/group_std_mean": 0.1376100465655327,
"signal/accuracy_reward/group_zero_std_frac": 0.5555555760860443,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8194542527198792,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04742295891046524,
"signal/advantage_abs_mean": 0.7460556268692017,
"signal/advantage_pre_scale_abs_mean": 0.06816202774643898,
"signal/advantage_pre_scale_std": 0.11762821078300476,
"signal/advantage_std": 0.9827412247657776,
"signal/brier_reward/centered_abs_mean": 0.09877004623413085,
"signal/brier_reward/group_std_mean": 0.13211368918418884,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17169649600982667,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009877004846930503,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018939389660954476,
"signal/confidence_uniqueness_reward/group_std_mean": 0.028860129415988922,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03311100825667381,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001893938984721899,
"signal/format_reward/centered_abs_mean": 0.005116102378815412,
"signal/format_reward/group_std_mean": 0.011858247593045235,
"signal/format_reward/group_zero_std_frac": 0.9416666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04352925010025501,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002558051189407706,
"signal/frontier_coverage_0/centered_abs_mean": 0.13048950880765914,
"signal/frontier_coverage_0/group_std_mean": 0.17408806085586548,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03235846050083637,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018659999826923014,
"signal/frontier_coverage_1/centered_abs_mean": 0.13048950880765914,
"signal/frontier_coverage_1/group_std_mean": 0.17408806085586548,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03235846050083637,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018659999826923014,
"signal/frontier_coverage_10/centered_abs_mean": 0.0592160664498806,
"signal/frontier_coverage_10/group_std_mean": 0.0748526081442833,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014810064993798732,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008467897423543036,
"signal/frontier_coverage_15/centered_abs_mean": 0.08147549778223037,
"signal/frontier_coverage_15/group_std_mean": 0.1004263550043106,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020447418093681335,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001165099604986608,
"signal/frontier_coverage_20/centered_abs_mean": 0.11275746524333954,
"signal/frontier_coverage_20/group_std_mean": 0.14012570679187775,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028291113302111627,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001612431718967855,
"signal/frontier_coverage_25/centered_abs_mean": 0.1510842740535736,
"signal/frontier_coverage_25/group_std_mean": 0.18920941650867462,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0378778375685215,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021605050656944512,
"signal/frontier_coverage_5/centered_abs_mean": 0.1297021821141243,
"signal/frontier_coverage_5/group_std_mean": 0.17307354807853698,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032162808999419215,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018547413172200322,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32503774762153625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38833544254302976,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.570234090089798,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032503775879740716,
"step": 865
},
{
"calibration/aurc": 0.10472881819001853,
"calibration/batch_distribution_entropy": 0.9079796069299875,
"calibration/buffer_distribution_entropy": 0.9751814829986106,
"calibration/confidence_entropy": 0.49213627310384805,
"calibration/coverage@0%": 0.11510416666666667,
"calibration/coverage@1%": 0.14114583333333333,
"calibration/coverage@10%": 0.5958333333333334,
"calibration/coverage@15%": 0.8401041666666667,
"calibration/coverage@20%": 0.89375,
"calibration/coverage@25%": 0.9239583333333334,
"calibration/coverage@30%": 0.953125,
"calibration/coverage@5%": 0.34843750000000007,
"calibration/ece": 0.14281713541666663,
"calibration/mean_confidence": 0.6558258854166668,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003211805555555558,
"completions/max_length": 4017.2,
"completions/max_terminated_length": 4017.2,
"completions/mean_length": 1390.4245849609374,
"completions/mean_terminated_length": 1394.8662841796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 417.4,
"epoch": 2.09119886001425,
"grad_norm": 0.002695485483855009,
"learning_rate": 1.0216346153846154e-06,
"loss": -0.0103,
"num_tokens": 2483978098.0,
"reward": 1.0271677494049072,
"reward_std": 0.09729326367378235,
"rewards/accuracy_reward": 0.7520833253860474,
"rewards/brier_reward": 0.8424885869026184,
"rewards/confidence_uniqueness_reward": 0.9420551657676697,
"rewards/format_reward": 0.9967881917953492,
"rewards/frontier_coverage_0": 0.010245061293244363,
"rewards/frontier_coverage_1": 0.010245061293244363,
"rewards/frontier_coverage_10": 0.049554044008255006,
"rewards/frontier_coverage_15": 0.11662421822547912,
"rewards/frontier_coverage_20": 0.19967409074306489,
"rewards/frontier_coverage_25": 0.3006702125072479,
"rewards/frontier_coverage_5": 0.010480654053390026,
"rewards/frontier_entropy_batch_reward": -0.35696548223495483,
"signal/accuracy_reward/centered_abs_mean": 0.10629340261220932,
"signal/accuracy_reward/group_std_mean": 0.14241406172513962,
"signal/accuracy_reward/group_zero_std_frac": 0.5861111283302307,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9110217213630676,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05314670130610466,
"signal/advantage_abs_mean": 0.7621482253074646,
"signal/advantage_pre_scale_abs_mean": 0.07348197922110558,
"signal/advantage_pre_scale_std": 0.12210540175437927,
"signal/advantage_std": 0.9827365040779114,
"signal/brier_reward/centered_abs_mean": 0.1055911734700203,
"signal/brier_reward/group_std_mean": 0.13631585836410523,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18323537707328796,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01055911760777235,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02010197788476944,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02892959825694561,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035044122114777566,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002010197751224041,
"signal/format_reward/centered_abs_mean": 0.005463324673473835,
"signal/format_reward/group_std_mean": 0.010764547064900399,
"signal/format_reward/group_zero_std_frac": 0.9527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04613239541649818,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0027316623367369176,
"signal/frontier_coverage_0/centered_abs_mean": 0.13712626695632935,
"signal/frontier_coverage_0/group_std_mean": 0.17697598934173583,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03414354957640171,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019609056878834965,
"signal/frontier_coverage_1/centered_abs_mean": 0.13712626695632935,
"signal/frontier_coverage_1/group_std_mean": 0.17697598934173583,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03414354957640171,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019609056878834965,
"signal/frontier_coverage_10/centered_abs_mean": 0.058862689137458804,
"signal/frontier_coverage_10/group_std_mean": 0.07372922748327256,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014756158180534839,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008417364791966975,
"signal/frontier_coverage_15/centered_abs_mean": 0.08503047823905945,
"signal/frontier_coverage_15/group_std_mean": 0.1047013595700264,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02136564515531063,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012159358710050582,
"signal/frontier_coverage_20/centered_abs_mean": 0.12002795040607453,
"signal/frontier_coverage_20/group_std_mean": 0.14857746958732604,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030121758580207825,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001716399728320539,
"signal/frontier_coverage_25/centered_abs_mean": 0.1620142638683319,
"signal/frontier_coverage_25/group_std_mean": 0.2014380544424057,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04057658687233925,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002316803904250264,
"signal/frontier_coverage_5/centered_abs_mean": 0.13623161017894744,
"signal/frontier_coverage_5/group_std_mean": 0.17587542831897734,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03392289765179157,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019481121795251965,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3304478108882904,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3938392698764801,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.579087895154953,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03304478265345097,
"step": 870
},
{
"calibration/aurc": 0.14662707262516228,
"calibration/batch_distribution_entropy": 0.9622453671594036,
"calibration/buffer_distribution_entropy": 0.9745117786630301,
"calibration/confidence_entropy": 0.4938231503236358,
"calibration/coverage@0%": 0.10117493472584856,
"calibration/coverage@1%": 0.14234116623150567,
"calibration/coverage@10%": 0.4209747606614448,
"calibration/coverage@15%": 0.5815246953872932,
"calibration/coverage@20%": 0.6905080504786771,
"calibration/coverage@25%": 0.757797541340296,
"calibration/coverage@30%": 0.9203125,
"calibration/coverage@5%": 0.2972734442993908,
"calibration/ece": 0.1510147796997389,
"calibration/mean_confidence": 0.5970026253807659,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002083333333333326,
"completions/max_length": 3938.4,
"completions/max_terminated_length": 3938.4,
"completions/mean_length": 1414.036474609375,
"completions/mean_terminated_length": 1416.9802734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 449.4,
"epoch": 2.103198710016125,
"grad_norm": 0.002670974237844348,
"learning_rate": 9.915865384615386e-07,
"loss": -0.0005,
"num_tokens": 2503373014.0,
"reward": 1.0179281949996948,
"reward_std": 0.0986163780093193,
"rewards/accuracy_reward": 0.7246527791023254,
"rewards/brier_reward": 0.8295908212661743,
"rewards/confidence_uniqueness_reward": 0.9478270053863526,
"rewards/format_reward": 0.9979166626930237,
"rewards/frontier_coverage_0": 0.01796838641166687,
"rewards/frontier_coverage_1": 0.01796838641166687,
"rewards/frontier_coverage_10": 0.05329090356826782,
"rewards/frontier_coverage_15": 0.10762108713388444,
"rewards/frontier_coverage_20": 0.1816571831703186,
"rewards/frontier_coverage_25": 0.2727284550666809,
"rewards/frontier_coverage_5": 0.018276363145560026,
"rewards/frontier_entropy_batch_reward": -0.3067232221364975,
"signal/accuracy_reward/centered_abs_mean": 0.11227213442325593,
"signal/accuracy_reward/group_std_mean": 0.1481925517320633,
"signal/accuracy_reward/group_zero_std_frac": 0.575,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9142273187637329,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05613606721162796,
"signal/advantage_abs_mean": 0.7798005938529968,
"signal/advantage_pre_scale_abs_mean": 0.07608538419008255,
"signal/advantage_pre_scale_std": 0.12115374058485032,
"signal/advantage_std": 0.9828497409820557,
"signal/brier_reward/centered_abs_mean": 0.10694562196731568,
"signal/brier_reward/group_std_mean": 0.13694375157356262,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17493544220924379,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010694562830030918,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01673703547567129,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023892204836010934,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027389925345778464,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001673703594133258,
"signal/format_reward/centered_abs_mean": 0.0036566840135492384,
"signal/format_reward/group_std_mean": 0.007749038189649582,
"signal/format_reward/group_zero_std_frac": 0.9638888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.029558508843183517,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0018283420067746192,
"signal/frontier_coverage_0/centered_abs_mean": 0.1515151709318161,
"signal/frontier_coverage_0/group_std_mean": 0.19341229498386384,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03542120829224586,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00216666697524488,
"signal/frontier_coverage_1/centered_abs_mean": 0.1515151709318161,
"signal/frontier_coverage_1/group_std_mean": 0.19341229498386384,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03542120829224586,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00216666697524488,
"signal/frontier_coverage_10/centered_abs_mean": 0.060925094038248064,
"signal/frontier_coverage_10/group_std_mean": 0.07617206424474716,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014272183738648891,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000871228810865432,
"signal/frontier_coverage_15/centered_abs_mean": 0.07883169800043106,
"signal/frontier_coverage_15/group_std_mean": 0.09689974635839463,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018499715998768807,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011272932635620237,
"signal/frontier_coverage_20/centered_abs_mean": 0.10982066988945008,
"signal/frontier_coverage_20/group_std_mean": 0.13585858047008514,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025753576681017874,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001570435566827655,
"signal/frontier_coverage_25/centered_abs_mean": 0.14952315390110016,
"signal/frontier_coverage_25/group_std_mean": 0.18586367070674897,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03504568859934807,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021381810773164036,
"signal/frontier_coverage_5/centered_abs_mean": 0.15073439180850984,
"signal/frontier_coverage_5/group_std_mean": 0.19244405031204223,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03523862287402153,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021555018145591022,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33520500659942626,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3998372495174408,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5495693683624268,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03352050185203552,
"step": 875
},
{
"calibration/aurc": 0.1376710203770225,
"calibration/batch_distribution_entropy": 0.9580564845872235,
"calibration/buffer_distribution_entropy": 0.9737883093034526,
"calibration/confidence_entropy": 0.48958195789736864,
"calibration/coverage@0%": 0.11123125504080489,
"calibration/coverage@1%": 0.20157068062827227,
"calibration/coverage@10%": 0.3906456331250939,
"calibration/coverage@15%": 0.7124519841975039,
"calibration/coverage@20%": 0.8295517613768404,
"calibration/coverage@25%": 0.8598868125709129,
"calibration/coverage@30%": 0.8834183150383442,
"calibration/coverage@5%": 0.33841810998865396,
"calibration/ece": 0.18009319174333246,
"calibration/mean_confidence": 0.5862276137465312,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001649305555555558,
"completions/max_length": 3926.8,
"completions/max_terminated_length": 3926.8,
"completions/mean_length": 1418.9532958984375,
"completions/mean_terminated_length": 1421.2898193359374,
"completions/min_length": 0.0,
"completions/min_terminated_length": 428.4,
"epoch": 2.115198560018,
"grad_norm": 0.002700896468013525,
"learning_rate": 9.615384615384617e-07,
"loss": -0.0022,
"num_tokens": 2522803900.0,
"reward": 1.020362961292267,
"reward_std": 0.09932073801755906,
"rewards/accuracy_reward": 0.7322916746139526,
"rewards/brier_reward": 0.8255838513374328,
"rewards/confidence_uniqueness_reward": 0.9477863907814026,
"rewards/format_reward": 0.9983506917953491,
"rewards/frontier_coverage_0": 0.008382726181298494,
"rewards/frontier_coverage_1": 0.008382726181298494,
"rewards/frontier_coverage_10": 0.045684900134801865,
"rewards/frontier_coverage_15": 0.10314829349517822,
"rewards/frontier_coverage_20": 0.17775794565677644,
"rewards/frontier_coverage_25": 0.2699141949415207,
"rewards/frontier_coverage_5": 0.008700376003980636,
"rewards/frontier_entropy_batch_reward": -0.3118946313858032,
"signal/accuracy_reward/centered_abs_mean": 0.11066623479127884,
"signal/accuracy_reward/group_std_mean": 0.15176202952861786,
"signal/accuracy_reward/group_zero_std_frac": 0.5472222328186035,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9180627942085267,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05533311739563942,
"signal/advantage_abs_mean": 0.7546151518821717,
"signal/advantage_pre_scale_abs_mean": 0.07361921072006225,
"signal/advantage_pre_scale_std": 0.1216355249285698,
"signal/advantage_std": 0.9828206658363342,
"signal/brier_reward/centered_abs_mean": 0.11196579039096832,
"signal/brier_reward/group_std_mean": 0.14342193007469178,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18711092174053193,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011196578480303287,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01612330451607704,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024166127666831017,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02694421596825123,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016123305074870586,
"signal/format_reward/centered_abs_mean": 0.003152126749046147,
"signal/format_reward/group_std_mean": 0.008134117349982262,
"signal/format_reward/group_zero_std_frac": 0.9583333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.026590963266789912,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0015760633745230735,
"signal/frontier_coverage_0/centered_abs_mean": 0.15225654542446138,
"signal/frontier_coverage_0/group_std_mean": 0.1960272341966629,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03628757819533348,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021772684995085,
"signal/frontier_coverage_1/centered_abs_mean": 0.15225654542446138,
"signal/frontier_coverage_1/group_std_mean": 0.1960272341966629,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03628757819533348,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021772684995085,
"signal/frontier_coverage_10/centered_abs_mean": 0.06339073628187179,
"signal/frontier_coverage_10/group_std_mean": 0.08014297634363174,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015141036547720432,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009064875310286879,
"signal/frontier_coverage_15/centered_abs_mean": 0.08139910399913788,
"signal/frontier_coverage_15/group_std_mean": 0.10091938078403473,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019420773163437842,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011640072101727129,
"signal/frontier_coverage_20/centered_abs_mean": 0.11484313309192658,
"signal/frontier_coverage_20/group_std_mean": 0.14308151602745056,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027388099953532218,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016422567423433065,
"signal/frontier_coverage_25/centered_abs_mean": 0.15665629208087922,
"signal/frontier_coverage_25/group_std_mean": 0.19622641503810884,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0373531699180603,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022401849273592234,
"signal/frontier_coverage_5/centered_abs_mean": 0.15140585005283355,
"signal/frontier_coverage_5/group_std_mean": 0.1949646919965744,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03608435168862343,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002165103517472744,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3276274800300598,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39510163068771365,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5458629727363586,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03276274874806404,
"step": 880
},
{
"calibration/aurc": 0.07528091745904578,
"calibration/batch_distribution_entropy": 0.9633578410189617,
"calibration/buffer_distribution_entropy": 0.9736194749383638,
"calibration/confidence_entropy": 0.4910086738322564,
"calibration/coverage@0%": 0.12088364882506528,
"calibration/coverage@1%": 0.33416149912967796,
"calibration/coverage@10%": 0.6406005221932115,
"calibration/coverage@15%": 0.822749401653612,
"calibration/coverage@20%": 0.9265217036553525,
"calibration/coverage@25%": 0.9723931135770234,
"calibration/coverage@30%": 0.9895833333333334,
"calibration/coverage@5%": 0.5827417863359444,
"calibration/ece": 0.23126799257506522,
"calibration/mean_confidence": 0.5690869960019582,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00277777777777779,
"completions/max_length": 3843.4,
"completions/max_terminated_length": 3843.4,
"completions/mean_length": 1369.9625732421875,
"completions/mean_terminated_length": 1373.880810546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 398.8,
"epoch": 2.127198410019875,
"grad_norm": 0.0029529957100749016,
"learning_rate": 9.314903846153847e-07,
"loss": -0.0044,
"num_tokens": 2541659533.0,
"reward": 1.0487470626831055,
"reward_std": 0.08783675283193589,
"rewards/accuracy_reward": 0.7880208373069764,
"rewards/brier_reward": 0.8365139484405517,
"rewards/confidence_uniqueness_reward": 0.9465442538261414,
"rewards/format_reward": 0.9972222089767456,
"rewards/frontier_coverage_0": -0.01605305355042219,
"rewards/frontier_coverage_1": -0.01605305355042219,
"rewards/frontier_coverage_10": 0.044044318795204165,
"rewards/frontier_coverage_15": 0.11718032211065292,
"rewards/frontier_coverage_20": 0.20520202815532684,
"rewards/frontier_coverage_25": 0.3133831262588501,
"rewards/frontier_coverage_5": -0.015344736352562904,
"rewards/frontier_entropy_batch_reward": -0.31223025918006897,
"signal/accuracy_reward/centered_abs_mean": 0.09235026091337203,
"signal/accuracy_reward/group_std_mean": 0.12444257289171219,
"signal/accuracy_reward/group_zero_std_frac": 0.6305555701255798,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8251009345054626,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04617513045668602,
"signal/advantage_abs_mean": 0.7711875319480896,
"signal/advantage_pre_scale_abs_mean": 0.06669707149267197,
"signal/advantage_pre_scale_std": 0.10958891659975052,
"signal/advantage_std": 0.9827027201652527,
"signal/brier_reward/centered_abs_mean": 0.10570832341909409,
"signal/brier_reward/group_std_mean": 0.1348507136106491,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18915933072566987,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010570832900702954,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016615297459065915,
"signal/confidence_uniqueness_reward/group_std_mean": 0.023423294350504876,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029620739817619323,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016615298809483647,
"signal/format_reward/centered_abs_mean": 0.0035047742538154127,
"signal/format_reward/group_std_mean": 0.007171806693077087,
"signal/format_reward/group_zero_std_frac": 0.9666666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.030849797092378138,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0017523871269077063,
"signal/frontier_coverage_0/centered_abs_mean": 0.14871225953102113,
"signal/frontier_coverage_0/group_std_mean": 0.18890169262886047,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03809470310807228,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021265852730721234,
"signal/frontier_coverage_1/centered_abs_mean": 0.14871225953102113,
"signal/frontier_coverage_1/group_std_mean": 0.18890169262886047,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03809470310807228,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021265852730721234,
"signal/frontier_coverage_10/centered_abs_mean": 0.061393161118030545,
"signal/frontier_coverage_10/group_std_mean": 0.07690578997135163,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01572293322533369,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008779221563600004,
"signal/frontier_coverage_15/centered_abs_mean": 0.07879534959793091,
"signal/frontier_coverage_15/group_std_mean": 0.09692421555519104,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020150484517216682,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011267734691500664,
"signal/frontier_coverage_20/centered_abs_mean": 0.10975634306669235,
"signal/frontier_coverage_20/group_std_mean": 0.13552465736865998,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02805260457098484,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001569515699520707,
"signal/frontier_coverage_25/centered_abs_mean": 0.14789953231811523,
"signal/frontier_coverage_25/group_std_mean": 0.1838620573282242,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03780416175723076,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002114963345229626,
"signal/frontier_coverage_5/centered_abs_mean": 0.1476011872291565,
"signal/frontier_coverage_5/group_std_mean": 0.1875333845615387,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03781076371669769,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002110696933232248,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32497783899307253,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3905618965625763,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5809773206710815,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03249778635799885,
"step": 885
},
{
"calibration/aurc": 0.04501724498201738,
"calibration/batch_distribution_entropy": 0.9603090893588673,
"calibration/buffer_distribution_entropy": 0.9745517804075691,
"calibration/confidence_entropy": 0.4651638780880843,
"calibration/coverage@0%": 0.2614841710182768,
"calibration/coverage@1%": 0.5651300043516102,
"calibration/coverage@10%": 0.8623463337684942,
"calibration/coverage@15%": 0.9040361183637946,
"calibration/coverage@20%": 0.9347979221061792,
"calibration/coverage@25%": 0.9624374456048738,
"calibration/coverage@30%": 0.9916449086161879,
"calibration/coverage@5%": 0.7617901436031331,
"calibration/ece": 0.2291900868690165,
"calibration/mean_confidence": 0.5784430661444735,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001822916666666674,
"completions/max_length": 3955.2,
"completions/max_terminated_length": 3955.2,
"completions/mean_length": 1485.9614990234375,
"completions/mean_terminated_length": 1488.7108154296875,
"completions/min_length": 90.8,
"completions/min_terminated_length": 420.0,
"epoch": 2.13919826002175,
"grad_norm": 0.0026017827913165092,
"learning_rate": 9.014423076923078e-07,
"loss": 0.001,
"num_tokens": 2561890065.0,
"reward": 1.0419267177581788,
"reward_std": 0.0928407445549965,
"rewards/accuracy_reward": 0.7695312619209289,
"rewards/brier_reward": 0.835858428478241,
"rewards/confidence_uniqueness_reward": 0.9476023077964782,
"rewards/format_reward": 0.9981770753860474,
"rewards/frontier_coverage_0": 0.0017650447785854339,
"rewards/frontier_coverage_1": 0.0017650447785854339,
"rewards/frontier_coverage_10": 0.05538591891527176,
"rewards/frontier_coverage_15": 0.12445316910743713,
"rewards/frontier_coverage_20": 0.21158009469509126,
"rewards/frontier_coverage_25": 0.3179014027118683,
"rewards/frontier_coverage_5": 0.0021275728940963745,
"rewards/frontier_entropy_batch_reward": -0.3049774348735809,
"signal/accuracy_reward/centered_abs_mean": 0.10240342766046524,
"signal/accuracy_reward/group_std_mean": 0.1418137162923813,
"signal/accuracy_reward/group_zero_std_frac": 0.569444453716278,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.913576877117157,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05120171383023262,
"signal/advantage_abs_mean": 0.7645326375961303,
"signal/advantage_pre_scale_abs_mean": 0.06943797469139099,
"signal/advantage_pre_scale_std": 0.11579804867506027,
"signal/advantage_std": 0.9827001333236695,
"signal/brier_reward/centered_abs_mean": 0.10340909659862518,
"signal/brier_reward/group_std_mean": 0.13372913300991057,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18566021621227263,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010340910032391548,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017535041272640228,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024436182528734206,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0314997099339962,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017535041086375713,
"signal/format_reward/centered_abs_mean": 0.003293185739312321,
"signal/format_reward/group_std_mean": 0.006819825246930122,
"signal/format_reward/group_zero_std_frac": 0.9694444417953492,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02991781122982502,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0016465928696561606,
"signal/frontier_coverage_0/centered_abs_mean": 0.14981991052627563,
"signal/frontier_coverage_0/group_std_mean": 0.19636679589748382,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03842330724000931,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002142424648627639,
"signal/frontier_coverage_1/centered_abs_mean": 0.14981991052627563,
"signal/frontier_coverage_1/group_std_mean": 0.19636679589748382,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03842330724000931,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002142424648627639,
"signal/frontier_coverage_10/centered_abs_mean": 0.06453083753585816,
"signal/frontier_coverage_10/group_std_mean": 0.080882328748703,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01652641948312521,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009227909496985376,
"signal/frontier_coverage_15/centered_abs_mean": 0.08124902099370956,
"signal/frontier_coverage_15/group_std_mean": 0.09994722455739975,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020808987319469452,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011618610005825758,
"signal/frontier_coverage_20/centered_abs_mean": 0.11021952629089356,
"signal/frontier_coverage_20/group_std_mean": 0.13671945929527282,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028233184292912484,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001576139172539115,
"signal/frontier_coverage_25/centered_abs_mean": 0.1474437177181244,
"signal/frontier_coverage_25/group_std_mean": 0.18446506559848785,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03775979653000831,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021084450650960206,
"signal/frontier_coverage_5/centered_abs_mean": 0.14860370755195618,
"signal/frontier_coverage_5/group_std_mean": 0.19485906660556793,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03811139240860939,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021250330843031406,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33459954857826235,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3981178283691406,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5996464252471924,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03345995619893074,
"step": 890
},
{
"calibration/aurc": 0.1280523015671718,
"calibration/batch_distribution_entropy": 0.9510907337880019,
"calibration/buffer_distribution_entropy": 0.974771002805943,
"calibration/confidence_entropy": 0.48342940663851514,
"calibration/coverage@0%": 0.049491405570060924,
"calibration/coverage@1%": 0.049491405570060924,
"calibration/coverage@10%": 0.38817721932114885,
"calibration/coverage@15%": 0.6707218233246302,
"calibration/coverage@20%": 0.8403707027850305,
"calibration/coverage@25%": 0.9280134899912967,
"calibration/coverage@30%": 0.9958333333333333,
"calibration/coverage@5%": 0.2808447563098347,
"calibration/ece": 0.18869512305537425,
"calibration/mean_confidence": 0.5883434234796562,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0021701388888889063,
"completions/max_length": 3923.2,
"completions/max_terminated_length": 3923.2,
"completions/mean_length": 1463.28515625,
"completions/mean_terminated_length": 1466.5015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 335.2,
"epoch": 2.151198110023625,
"grad_norm": 0.0026420399080961943,
"learning_rate": 8.713942307692308e-07,
"loss": -0.0033,
"num_tokens": 2581828038.0,
"reward": 1.026082420349121,
"reward_std": 0.09336404502391815,
"rewards/accuracy_reward": 0.7427951574325562,
"rewards/brier_reward": 0.8260280966758728,
"rewards/confidence_uniqueness_reward": 0.9466118693351746,
"rewards/format_reward": 0.9978298664093017,
"rewards/frontier_coverage_0": 0.0059367487207055095,
"rewards/frontier_coverage_1": 0.0059367487207055095,
"rewards/frontier_coverage_10": 0.05136653557419777,
"rewards/frontier_coverage_15": 0.11881858706474305,
"rewards/frontier_coverage_20": 0.20078957080841064,
"rewards/frontier_coverage_25": 0.2984639883041382,
"rewards/frontier_coverage_5": 0.006314245797693729,
"rewards/frontier_entropy_batch_reward": -0.31327160596847536,
"signal/accuracy_reward/centered_abs_mean": 0.10116644948720932,
"signal/accuracy_reward/group_std_mean": 0.14085038453340532,
"signal/accuracy_reward/group_zero_std_frac": 0.5722222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8828286409378052,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05058322474360466,
"signal/advantage_abs_mean": 0.7592063546180725,
"signal/advantage_pre_scale_abs_mean": 0.06951456665992736,
"signal/advantage_pre_scale_std": 0.11602886617183686,
"signal/advantage_std": 0.982738447189331,
"signal/brier_reward/centered_abs_mean": 0.1086281344294548,
"signal/brier_reward/group_std_mean": 0.14015234708786012,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18977911174297332,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010862813144922257,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01798480302095413,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02583237551152706,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031481166183948514,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017984803766012193,
"signal/format_reward/centered_abs_mean": 0.003965928812976927,
"signal/format_reward/group_std_mean": 0.008585151471197604,
"signal/format_reward/group_zero_std_frac": 0.9611111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.034098946023732424,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0019829644064884634,
"signal/frontier_coverage_0/centered_abs_mean": 0.15116261839866638,
"signal/frontier_coverage_0/group_std_mean": 0.1969261050224304,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037714557349681856,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021616254933178427,
"signal/frontier_coverage_1/centered_abs_mean": 0.15116261839866638,
"signal/frontier_coverage_1/group_std_mean": 0.1969261050224304,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037714557349681856,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021616254933178427,
"signal/frontier_coverage_10/centered_abs_mean": 0.064113200455904,
"signal/frontier_coverage_10/group_std_mean": 0.07983765006065369,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01604436244815588,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009168186923488974,
"signal/frontier_coverage_15/centered_abs_mean": 0.0844537153840065,
"signal/frontier_coverage_15/group_std_mean": 0.10395829975605012,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021203552559018134,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001207688101567328,
"signal/frontier_coverage_20/centered_abs_mean": 0.1167292207479477,
"signal/frontier_coverage_20/group_std_mean": 0.1449556201696396,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029318232834339143,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016692279605194926,
"signal/frontier_coverage_25/centered_abs_mean": 0.15577602088451387,
"signal/frontier_coverage_25/group_std_mean": 0.1945989966392517,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.039113936573266984,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002227597124874592,
"signal/frontier_coverage_5/centered_abs_mean": 0.15000051259994507,
"signal/frontier_coverage_5/group_std_mean": 0.19544619619846343,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03742243126034737,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002145007345825434,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32618371248245237,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3917438447475433,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5712901592254639,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03261837288737297,
"step": 895
},
{
"calibration/aurc": 0.17971495000616086,
"calibration/batch_distribution_entropy": 0.9387109815779631,
"calibration/buffer_distribution_entropy": 0.975423395766124,
"calibration/confidence_entropy": 0.47164422241919307,
"calibration/coverage@0%": 0.016154013961605585,
"calibration/coverage@1%": 0.016154013961605585,
"calibration/coverage@10%": 0.33542484729493893,
"calibration/coverage@15%": 0.39011234729493893,
"calibration/coverage@20%": 0.6381653577661431,
"calibration/coverage@25%": 0.8498009380453752,
"calibration/coverage@30%": 0.9270015270506107,
"calibration/coverage@5%": 0.016154013961605585,
"calibration/ece": 0.17659054720222508,
"calibration/mean_confidence": 0.5827529517615619,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0028645833333333258,
"completions/max_length": 4001.0,
"completions/max_terminated_length": 4001.0,
"completions/mean_length": 1518.630029296875,
"completions/mean_terminated_length": 1523.0183837890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 468.6,
"epoch": 2.1631979600255,
"grad_norm": 0.0024768190924078226,
"learning_rate": 8.41346153846154e-07,
"loss": -0.0054,
"num_tokens": 2602411456.0,
"reward": 1.0215399146080018,
"reward_std": 0.09830788671970367,
"rewards/accuracy_reward": 0.7322048544883728,
"rewards/brier_reward": 0.8238555550575256,
"rewards/confidence_uniqueness_reward": 0.9468532681465149,
"rewards/format_reward": 0.9971354246139527,
"rewards/frontier_coverage_0": 0.011091101169586181,
"rewards/frontier_coverage_1": 0.011091101169586181,
"rewards/frontier_coverage_10": 0.05177242755889892,
"rewards/frontier_coverage_15": 0.11416967511177063,
"rewards/frontier_coverage_20": 0.1915341079235077,
"rewards/frontier_coverage_25": 0.2842023193836212,
"rewards/frontier_coverage_5": 0.011335279606282712,
"rewards/frontier_entropy_batch_reward": -0.2985638976097107,
"signal/accuracy_reward/centered_abs_mean": 0.10755750834941864,
"signal/accuracy_reward/group_std_mean": 0.1495337277650833,
"signal/accuracy_reward/group_zero_std_frac": 0.547222226858139,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8794362545013428,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05377875417470932,
"signal/advantage_abs_mean": 0.7575773119926452,
"signal/advantage_pre_scale_abs_mean": 0.07303174883127213,
"signal/advantage_pre_scale_std": 0.12083332985639572,
"signal/advantage_std": 0.9828310012817383,
"signal/brier_reward/centered_abs_mean": 0.11027123332023621,
"signal/brier_reward/group_std_mean": 0.14200958609580994,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18250376284122466,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011027123592793942,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018133307434618474,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026324766129255293,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03011079877614975,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018133309204131365,
"signal/format_reward/centered_abs_mean": 0.004638671898283065,
"signal/format_reward/group_std_mean": 0.00965967532247305,
"signal/format_reward/group_zero_std_frac": 0.9555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03860214501619339,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0023193359491415324,
"signal/frontier_coverage_0/centered_abs_mean": 0.15348501801490783,
"signal/frontier_coverage_0/group_std_mean": 0.20067269504070281,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036319942027330396,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002194835641421378,
"signal/frontier_coverage_1/centered_abs_mean": 0.15348501801490783,
"signal/frontier_coverage_1/group_std_mean": 0.20067269504070281,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036319942027330396,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002194835641421378,
"signal/frontier_coverage_10/centered_abs_mean": 0.06233002617955208,
"signal/frontier_coverage_10/group_std_mean": 0.078104929625988,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014825647883117198,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000891319359652698,
"signal/frontier_coverage_15/centered_abs_mean": 0.08362076282501221,
"signal/frontier_coverage_15/group_std_mean": 0.1029461145401001,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01982920467853546,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011957768816500901,
"signal/frontier_coverage_20/centered_abs_mean": 0.11619037836790085,
"signal/frontier_coverage_20/group_std_mean": 0.1436010032892227,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027482646331191064,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001661522383801639,
"signal/frontier_coverage_25/centered_abs_mean": 0.15632675886154174,
"signal/frontier_coverage_25/group_std_mean": 0.1943357616662979,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03692755475640297,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022354727145284413,
"signal/frontier_coverage_5/centered_abs_mean": 0.1523052781820297,
"signal/frontier_coverage_5/group_std_mean": 0.19916028082370757,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036041303724050525,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021779653849080204,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3256483495235443,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39229129552841185,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5407821238040924,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03256483599543571,
"step": 900
},
{
"epoch": 2.1631979600255,
"eval_calibration/aurc": 0.12737566735071082,
"eval_calibration/batch_distribution_entropy": 0.9176665914052275,
"eval_calibration/buffer_distribution_entropy": 0.9754314783079789,
"eval_calibration/confidence_entropy": 0.4755197109213362,
"eval_calibration/coverage@0%": 0.32577284946236557,
"eval_calibration/coverage@1%": 0.32577284946236557,
"eval_calibration/coverage@10%": 0.46169354838709675,
"eval_calibration/coverage@15%": 0.5354502688172044,
"eval_calibration/coverage@20%": 0.8802083333333334,
"eval_calibration/coverage@25%": 0.9427083333333334,
"eval_calibration/coverage@30%": 1.0,
"eval_calibration/coverage@5%": 0.34139784946236557,
"eval_calibration/ece": 0.2249175739247312,
"eval_calibration/mean_confidence": 0.5708137432795698,
"eval_completions/clipped_ratio": 0.0017361111111111234,
"eval_completions/max_length": 3592.1666666666665,
"eval_completions/max_terminated_length": 3592.1666666666665,
"eval_completions/mean_length": 1524.2620035807292,
"eval_completions/mean_terminated_length": 1526.8823852539062,
"eval_completions/min_length": 392.8333333333333,
"eval_completions/min_terminated_length": 567.8333333333334,
"eval_loss": 0.0,
"eval_num_tokens": 2602411456.0,
"eval_reward": 0.9374911387761434,
"eval_reward_std": 0.22820752362410227,
"eval_rewards/accuracy_reward": 0.7118055522441864,
"eval_rewards/brier_reward": 0.8265486260255178,
"eval_rewards/confidence_uniqueness_reward": 0.8966138859589895,
"eval_rewards/format_reward": 0.9973958432674408,
"eval_rewards/frontier_coverage_0": 0.02534069788331787,
"eval_rewards/frontier_coverage_1": 0.02534069788331787,
"eval_rewards/frontier_coverage_10": 0.05668467034896215,
"eval_rewards/frontier_coverage_15": 0.11529384429256122,
"eval_rewards/frontier_coverage_20": 0.1911569188038508,
"eval_rewards/frontier_coverage_25": 0.2818978354334831,
"eval_rewards/frontier_coverage_5": 0.02552862201506893,
"eval_rewards/frontier_entropy_batch_reward": -0.9973958432674408,
"eval_runtime": 207.3445,
"eval_samples_per_second": 4.823,
"eval_signal/accuracy_reward/centered_abs_mean": 0.3916015625,
"eval_signal/accuracy_reward/group_std_mean": 0.4472481807072957,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.867256224155426,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19580078125,
"eval_signal/advantage_abs_mean": 0.8500007092952728,
"eval_signal/advantage_pre_scale_abs_mean": 0.19565576066573462,
"eval_signal/advantage_pre_scale_std": 0.22633356104294458,
"eval_signal/advantage_std": 0.9863857130209605,
"eval_signal/brier_reward/centered_abs_mean": 0.16644690930843353,
"eval_signal/brier_reward/group_std_mean": 0.22046558558940887,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07374625280499458,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.016644690961887438,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.042304361859957375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05535396312673887,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01876258881141742,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004230436325694124,
"eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376,
"eval_signal/format_reward/group_std_mean": 0.014731391333043575,
"eval_signal/format_reward/group_zero_std_frac": 0.9166666865348816,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010851632803678513,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2662147382895152,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3709094375371933,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016928008447090786,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003806870896369219,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2662147382895152,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3709094375371933,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016928008447090786,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003806870896369219,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.08367854605118434,
"eval_signal/frontier_coverage_10/group_std_mean": 0.11023158207535744,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005312102691580852,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011966032131264608,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.12918043757478395,
"eval_signal/frontier_coverage_15/group_std_mean": 0.16170358409484228,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00819395606716474,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018472802476026118,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.21566026906172434,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2624283855160077,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013669513786832491,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030839417595416307,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.31325619916121167,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3792371451854706,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.019848248300453026,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004479563406979044,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2640439545114835,
"eval_signal/frontier_coverage_5/group_std_mean": 0.368171289563179,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01678965923686822,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00377582855677853,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0021703265762577453,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.000504557314949731,
"eval_steps_per_second": 0.029,
"step": 900
},
{
"epoch": 2.1631979600255,
"step": 900,
"train_probe_calibration/aurc": 0.10339707316454276,
"train_probe_calibration/batch_distribution_entropy": 0.9102197374475085,
"train_probe_calibration/buffer_distribution_entropy": 0.9751232554999216,
"train_probe_calibration/confidence_entropy": 0.4694546862543447,
"train_probe_calibration/coverage@0%": 0.35668682795698925,
"train_probe_calibration/coverage@1%": 0.35668682795698925,
"train_probe_calibration/coverage@10%": 0.5608198924731183,
"train_probe_calibration/coverage@15%": 0.7224462365591399,
"train_probe_calibration/coverage@20%": 0.8996975806451614,
"train_probe_calibration/coverage@25%": 0.9519489247311829,
"train_probe_calibration/coverage@30%": 1.0,
"train_probe_calibration/coverage@5%": 0.43565188172043007,
"train_probe_calibration/ece": 0.23572354166666673,
"train_probe_calibration/mean_confidence": 0.6060012567204301,
"train_probe_completions/clipped_ratio": 0.002604166666666685,
"train_probe_completions/max_length": 3242.0,
"train_probe_completions/max_terminated_length": 3242.0,
"train_probe_completions/mean_length": 1525.1625366210938,
"train_probe_completions/mean_terminated_length": 1529.1767578125,
"train_probe_completions/min_length": 219.16666666666666,
"train_probe_completions/min_terminated_length": 446.0,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 2602411456.0,
"train_probe_reward": 0.9594915211200714,
"train_probe_reward_std": 0.21700799961884817,
"train_probe_rewards/accuracy_reward": 0.7560763955116272,
"train_probe_rewards/brier_reward": 0.8315047522385915,
"train_probe_rewards/confidence_uniqueness_reward": 0.8951753179232279,
"train_probe_rewards/format_reward": 0.9973958432674408,
"train_probe_rewards/frontier_coverage_0": 0.003199717883641521,
"train_probe_rewards/frontier_coverage_1": 0.003199717883641521,
"train_probe_rewards/frontier_coverage_10": 0.0520126453290383,
"train_probe_rewards/frontier_coverage_15": 0.11875824133555095,
"train_probe_rewards/frontier_coverage_20": 0.20274977634350458,
"train_probe_rewards/frontier_coverage_25": 0.303492138783137,
"train_probe_rewards/frontier_coverage_5": 0.003787370825496813,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9973958432674408,
"train_probe_runtime": 195.4243,
"train_probe_samples_per_second": 5.117,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3585611979166667,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4281422396500905,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8391776780287424,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17928059895833334,
"train_probe_signal/advantage_abs_mean": 0.8138086001078287,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.1782543882727623,
"train_probe_signal/advantage_pre_scale_std": 0.2158868486682574,
"train_probe_signal/advantage_std": 0.9863618016242981,
"train_probe_signal/brier_reward/centered_abs_mean": 0.16066461553176245,
"train_probe_signal/brier_reward/group_std_mean": 0.22043094535668692,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07519176974892616,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.016066461335867643,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04383396108945211,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.057254182174801826,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02052529404560725,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004383396008051932,
"train_probe_signal/format_reward/centered_abs_mean": 0.0050455727614462376,
"train_probe_signal/format_reward/group_std_mean": 0.014731391333043575,
"train_probe_signal/format_reward/group_zero_std_frac": 0.9166666865348816,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011859034498532614,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.251843864719073,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.37152427931626636,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01686274539679289,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036013672749201455,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.251843864719073,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.37152427931626636,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01686274539679289,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036013672749201455,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.08020331958929698,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.10853784407178561,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005366942146793008,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011469074330913525,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.12478353704015414,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.1562705859541893,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.008351171389222145,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017844046621272962,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.20546038200457892,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.25083090364933014,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013755069114267826,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029380834894254804,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.29606155057748157,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.3606761296590169,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01982360954085986,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004233680199831724,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2498022640744845,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3688337703545888,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016726200158397358,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003572172368876636,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0023718070394049087,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045572761446238,
"train_probe_steps_per_second": 0.031
},
{
"calibration/aurc": 0.11625473761732023,
"calibration/batch_distribution_entropy": 0.9375525909506154,
"calibration/buffer_distribution_entropy": 0.9747469529937035,
"calibration/confidence_entropy": 0.5016573158140087,
"calibration/coverage@0%": 0.07990803583054076,
"calibration/coverage@1%": 0.14413780084359556,
"calibration/coverage@10%": 0.5565305096694833,
"calibration/coverage@15%": 0.6223219716048876,
"calibration/coverage@20%": 0.7940382941688425,
"calibration/coverage@25%": 0.8649776979982594,
"calibration/coverage@30%": 0.928077404264578,
"calibration/coverage@5%": 0.5094944015850825,
"calibration/ece": 0.18248357807338889,
"calibration/mean_confidence": 0.6210023286462509,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004600694444444442,
"completions/max_length": 4064.8,
"completions/max_terminated_length": 4064.8,
"completions/mean_length": 1550.9193603515625,
"completions/mean_terminated_length": 1558.0166259765624,
"completions/min_length": 0.0,
"completions/min_terminated_length": 433.2,
"epoch": 2.175197810027375,
"grad_norm": 0.0024279081262648106,
"learning_rate": 8.11298076923077e-07,
"loss": -0.0141,
"num_tokens": 2623369151.0,
"reward": 1.0176831007003784,
"reward_std": 0.10145644396543503,
"rewards/accuracy_reward": 0.7294270753860473,
"rewards/brier_reward": 0.828159236907959,
"rewards/confidence_uniqueness_reward": 0.9443735361099244,
"rewards/format_reward": 0.9953992962837219,
"rewards/frontier_coverage_0": 0.015256157889962197,
"rewards/frontier_coverage_1": 0.015256157889962197,
"rewards/frontier_coverage_10": 0.05224640518426895,
"rewards/frontier_coverage_15": 0.11739680767059327,
"rewards/frontier_coverage_20": 0.1974783331155777,
"rewards/frontier_coverage_25": 0.2923546195030212,
"rewards/frontier_coverage_5": 0.015459264814853668,
"rewards/frontier_entropy_batch_reward": -0.32071307897567747,
"signal/accuracy_reward/centered_abs_mean": 0.11327582597732544,
"signal/accuracy_reward/group_std_mean": 0.1501081556081772,
"signal/accuracy_reward/group_zero_std_frac": 0.5722222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9572094082832336,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05663791298866272,
"signal/advantage_abs_mean": 0.7761594295501709,
"signal/advantage_pre_scale_abs_mean": 0.07832107692956924,
"signal/advantage_pre_scale_std": 0.12819174826145172,
"signal/advantage_std": 0.9827957510948181,
"signal/brier_reward/centered_abs_mean": 0.1068603053689003,
"signal/brier_reward/group_std_mean": 0.1386454313993454,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18077919483184815,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01068603079766035,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020939309895038605,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0292234193533659,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03532315455377102,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020939309615641832,
"signal/format_reward/centered_abs_mean": 0.00721028633415699,
"signal/format_reward/group_std_mean": 0.012249564565718175,
"signal/format_reward/group_zero_std_frac": 0.9527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06003017425537109,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003605143167078495,
"signal/frontier_coverage_0/centered_abs_mean": 0.14778611361980437,
"signal/frontier_coverage_0/group_std_mean": 0.19523155093193054,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0357908271253109,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002113341446965933,
"signal/frontier_coverage_1/centered_abs_mean": 0.14778611361980437,
"signal/frontier_coverage_1/group_std_mean": 0.19523155093193054,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0357908271253109,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002113341446965933,
"signal/frontier_coverage_10/centered_abs_mean": 0.05996151715517044,
"signal/frontier_coverage_10/group_std_mean": 0.07582777291536331,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014531717076897622,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008574496838264168,
"signal/frontier_coverage_15/centered_abs_mean": 0.08238778412342071,
"signal/frontier_coverage_15/group_std_mean": 0.10163151174783706,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01998242549598217,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011781453620642423,
"signal/frontier_coverage_20/centered_abs_mean": 0.11567231565713883,
"signal/frontier_coverage_20/group_std_mean": 0.14291902482509614,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028052419424057007,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001654114224947989,
"signal/frontier_coverage_25/centered_abs_mean": 0.15648081600666047,
"signal/frontier_coverage_25/group_std_mean": 0.19369642734527587,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037944577634334564,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002237675618380308,
"signal/frontier_coverage_5/centered_abs_mean": 0.14658704698085784,
"signal/frontier_coverage_5/group_std_mean": 0.19370121657848358,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03550057634711266,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020961946807801723,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33480802178382874,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40186876654624937,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5686902463436126,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033480801805853846,
"step": 905
},
{
"calibration/aurc": 0.0762505990821146,
"calibration/batch_distribution_entropy": 0.9448920258160249,
"calibration/buffer_distribution_entropy": 0.9749763470687652,
"calibration/confidence_entropy": 0.4879323731747208,
"calibration/coverage@0%": 0.17987303493149745,
"calibration/coverage@1%": 0.23371247646727406,
"calibration/coverage@10%": 0.676308814637699,
"calibration/coverage@15%": 0.8841887565101484,
"calibration/coverage@20%": 0.9659603413447664,
"calibration/coverage@25%": 1.0,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.5516467755012299,
"calibration/ece": 0.23416966717296384,
"calibration/mean_confidence": 0.6035169900806124,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0038194444444444643,
"completions/max_length": 4006.2,
"completions/max_terminated_length": 4006.2,
"completions/mean_length": 1551.52646484375,
"completions/mean_terminated_length": 1557.5523193359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 500.4,
"epoch": 2.1871976600292498,
"grad_norm": 0.002537149004638195,
"learning_rate": 7.8125e-07,
"loss": -0.006,
"num_tokens": 2644336304.0,
"reward": 1.0250213384628295,
"reward_std": 0.09622730314731598,
"rewards/accuracy_reward": 0.7448784708976746,
"rewards/brier_reward": 0.8241114974021911,
"rewards/confidence_uniqueness_reward": 0.9452796816825867,
"rewards/format_reward": 0.9961805462837219,
"rewards/frontier_coverage_0": 0.003916370496153831,
"rewards/frontier_coverage_1": 0.003916370496153831,
"rewards/frontier_coverage_10": 0.05034622177481651,
"rewards/frontier_coverage_15": 0.11719416230916976,
"rewards/frontier_coverage_20": 0.19946504831314088,
"rewards/frontier_coverage_25": 0.2970153570175171,
"rewards/frontier_coverage_5": 0.004244742169976235,
"rewards/frontier_entropy_batch_reward": -0.32115537524223325,
"signal/accuracy_reward/centered_abs_mean": 0.10778537392616272,
"signal/accuracy_reward/group_std_mean": 0.14330336451530457,
"signal/accuracy_reward/group_zero_std_frac": 0.5888888835906982,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9045879483222962,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05389268696308136,
"signal/advantage_abs_mean": 0.7803467512130737,
"signal/advantage_pre_scale_abs_mean": 0.07378631830215454,
"signal/advantage_pre_scale_std": 0.12085007727146149,
"signal/advantage_std": 0.9827847957611084,
"signal/brier_reward/centered_abs_mean": 0.1151393249630928,
"signal/brier_reward/group_std_mean": 0.14573339223861695,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1954524338245392,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011513932794332504,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018627097085118293,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02717142626643181,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03194965198636055,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018627098063006998,
"signal/format_reward/centered_abs_mean": 0.005013020779006183,
"signal/format_reward/group_std_mean": 0.010137713141739368,
"signal/format_reward/group_zero_std_frac": 0.955555546283722,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.043760602921247484,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0025065103895030917,
"signal/frontier_coverage_0/centered_abs_mean": 0.1600848525762558,
"signal/frontier_coverage_0/group_std_mean": 0.2041901171207428,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03885265812277794,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022892132867127655,
"signal/frontier_coverage_1/centered_abs_mean": 0.1600848525762558,
"signal/frontier_coverage_1/group_std_mean": 0.2041901171207428,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03885265812277794,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022892132867127655,
"signal/frontier_coverage_10/centered_abs_mean": 0.0670699842274189,
"signal/frontier_coverage_10/group_std_mean": 0.08348270207643509,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01628856398165226,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009591008070856333,
"signal/frontier_coverage_15/centered_abs_mean": 0.08440963327884674,
"signal/frontier_coverage_15/group_std_mean": 0.10362496972084045,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020565735176205635,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012070577591657638,
"signal/frontier_coverage_20/centered_abs_mean": 0.1160733938217163,
"signal/frontier_coverage_20/group_std_mean": 0.143232861161232,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028256673365831375,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001659849495626986,
"signal/frontier_coverage_25/centered_abs_mean": 0.15518866181373597,
"signal/frontier_coverage_25/group_std_mean": 0.1923240453004837,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037742793560028076,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022191978525370358,
"signal/frontier_coverage_5/centered_abs_mean": 0.15902412235736846,
"signal/frontier_coverage_5/group_std_mean": 0.20288212597370148,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038597740978002545,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002274044952355325,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3329595446586609,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39746089577674865,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5680349946022034,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033295954763889316,
"step": 910
},
{
"calibration/aurc": 0.08763255499847859,
"calibration/batch_distribution_entropy": 0.9593239357381013,
"calibration/buffer_distribution_entropy": 0.9749175365346063,
"calibration/confidence_entropy": 0.4850151497736473,
"calibration/coverage@0%": 0.1297758230030463,
"calibration/coverage@1%": 0.2983730826460291,
"calibration/coverage@10%": 0.6446673665017447,
"calibration/coverage@15%": 0.7576914950817645,
"calibration/coverage@20%": 0.8550639094863646,
"calibration/coverage@25%": 0.9362886378997463,
"calibration/coverage@30%": 0.9807291666666668,
"calibration/coverage@5%": 0.4771018238588258,
"calibration/ece": 0.16642530184501453,
"calibration/mean_confidence": 0.5903065416765859,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00512152777777779,
"completions/max_length": 4016.8,
"completions/max_terminated_length": 4016.8,
"completions/mean_length": 1555.1954833984375,
"completions/mean_terminated_length": 1563.224755859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 467.0,
"epoch": 2.1991975100311247,
"grad_norm": 0.002537642838433385,
"learning_rate": 7.512019230769231e-07,
"loss": -0.009,
"num_tokens": 2665345692.0,
"reward": 1.0272498369216918,
"reward_std": 0.10110396295785903,
"rewards/accuracy_reward": 0.7412326574325562,
"rewards/brier_reward": 0.8351900458335877,
"rewards/confidence_uniqueness_reward": 0.9449792385101319,
"rewards/format_reward": 0.9948784708976746,
"rewards/frontier_coverage_0": 0.011141146440058947,
"rewards/frontier_coverage_1": 0.011141146440058947,
"rewards/frontier_coverage_10": 0.059621766209602356,
"rewards/frontier_coverage_15": 0.12630040645599366,
"rewards/frontier_coverage_20": 0.21101914644241332,
"rewards/frontier_coverage_25": 0.30998865365982053,
"rewards/frontier_coverage_5": 0.011431800480931998,
"rewards/frontier_entropy_batch_reward": -0.29413830637931826,
"signal/accuracy_reward/centered_abs_mean": 0.11163737028837203,
"signal/accuracy_reward/group_std_mean": 0.14887254685163498,
"signal/accuracy_reward/group_zero_std_frac": 0.569444453716278,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9359636783599854,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05581868514418602,
"signal/advantage_abs_mean": 0.7702690005302429,
"signal/advantage_pre_scale_abs_mean": 0.07702369540929795,
"signal/advantage_pre_scale_std": 0.12798326462507248,
"signal/advantage_std": 0.9828004717826844,
"signal/brier_reward/centered_abs_mean": 0.10626696497201919,
"signal/brier_reward/group_std_mean": 0.1377051830291748,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17910066843032837,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010626696608960628,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021018927916884422,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029378090426325797,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035751673951745035,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021018928149715067,
"signal/format_reward/centered_abs_mean": 0.007807074673473835,
"signal/format_reward/group_std_mean": 0.013004416413605214,
"signal/format_reward/group_zero_std_frac": 0.9527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06638518832623959,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0039035373367369173,
"signal/frontier_coverage_0/centered_abs_mean": 0.14786854684352874,
"signal/frontier_coverage_0/group_std_mean": 0.19159375727176667,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0357651524245739,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021145202219486235,
"signal/frontier_coverage_1/centered_abs_mean": 0.14786854684352874,
"signal/frontier_coverage_1/group_std_mean": 0.19159375727176667,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0357651524245739,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021145202219486235,
"signal/frontier_coverage_10/centered_abs_mean": 0.062376074492931366,
"signal/frontier_coverage_10/group_std_mean": 0.07864119410514832,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015165227092802524,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008919778745621442,
"signal/frontier_coverage_15/centered_abs_mean": 0.08264107257127762,
"signal/frontier_coverage_15/group_std_mean": 0.10196218788623809,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019957508146762847,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011817673221230507,
"signal/frontier_coverage_20/centered_abs_mean": 0.11547355949878693,
"signal/frontier_coverage_20/group_std_mean": 0.14330800771713256,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027816576510667802,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016512719681486487,
"signal/frontier_coverage_25/centered_abs_mean": 0.15543023347854615,
"signal/frontier_coverage_25/group_std_mean": 0.1938639521598816,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037404580414295195,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022226523142307997,
"signal/frontier_coverage_5/centered_abs_mean": 0.1467347264289856,
"signal/frontier_coverage_5/group_std_mean": 0.19018640220165253,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03549126200377941,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002098306594416499,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3218979060649872,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3870994865894318,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5446942985057831,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03218979090452194,
"step": 915
},
{
"calibration/aurc": 0.10810832702896182,
"calibration/batch_distribution_entropy": 0.9625593780795281,
"calibration/buffer_distribution_entropy": 0.9749842011690901,
"calibration/confidence_entropy": 0.4886371192108515,
"calibration/coverage@0%": 0.14716266762812186,
"calibration/coverage@1%": 0.188431210385539,
"calibration/coverage@10%": 0.6588592644298024,
"calibration/coverage@15%": 0.7659079756924984,
"calibration/coverage@20%": 0.8385027357046191,
"calibration/coverage@25%": 0.8986056621054501,
"calibration/coverage@30%": 0.9289297773160363,
"calibration/coverage@5%": 0.3055369041028165,
"calibration/ece": 0.15420951834596666,
"calibration/mean_confidence": 0.5739624371588201,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3969.8,
"completions/max_terminated_length": 3969.8,
"completions/mean_length": 1547.8402099609375,
"completions/mean_terminated_length": 1553.94873046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 391.4,
"epoch": 2.2111973600329997,
"grad_norm": 0.0025333850644528866,
"learning_rate": 7.211538461538461e-07,
"loss": -0.0053,
"num_tokens": 2686252779.0,
"reward": 1.0309239864349364,
"reward_std": 0.10125423967838287,
"rewards/accuracy_reward": 0.7498263835906982,
"rewards/brier_reward": 0.8307314157485962,
"rewards/confidence_uniqueness_reward": 0.9464572906494141,
"rewards/format_reward": 0.99609375,
"rewards/frontier_coverage_0": 0.004023569263517856,
"rewards/frontier_coverage_1": 0.004023569263517856,
"rewards/frontier_coverage_10": 0.0525899201631546,
"rewards/frontier_coverage_15": 0.12458169758319855,
"rewards/frontier_coverage_20": 0.2097533941268921,
"rewards/frontier_coverage_25": 0.30895119309425356,
"rewards/frontier_coverage_5": 0.004381868522614241,
"rewards/frontier_entropy_batch_reward": -0.29883754849433897,
"signal/accuracy_reward/centered_abs_mean": 0.1223415806889534,
"signal/accuracy_reward/group_std_mean": 0.1568053334951401,
"signal/accuracy_reward/group_zero_std_frac": 0.5722222447395324,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0544026374816895,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0611707903444767,
"signal/advantage_abs_mean": 0.7869542241096497,
"signal/advantage_pre_scale_abs_mean": 0.07895849943161011,
"signal/advantage_pre_scale_std": 0.12843640744686127,
"signal/advantage_std": 0.9827647566795349,
"signal/brier_reward/centered_abs_mean": 0.1082388699054718,
"signal/brier_reward/group_std_mean": 0.1389143019914627,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18672825992107392,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010823887214064598,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019709834456443788,
"signal/confidence_uniqueness_reward/group_std_mean": 0.028117352351546287,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03400571942329407,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019709833664819597,
"signal/format_reward/centered_abs_mean": 0.006776258768513799,
"signal/format_reward/group_std_mean": 0.012154985405504703,
"signal/format_reward/group_zero_std_frac": 0.9527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05845015123486519,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0033881293842568994,
"signal/frontier_coverage_0/centered_abs_mean": 0.1616591066122055,
"signal/frontier_coverage_0/group_std_mean": 0.20822520554065704,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0398515485227108,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002311725215986371,
"signal/frontier_coverage_1/centered_abs_mean": 0.1616591066122055,
"signal/frontier_coverage_1/group_std_mean": 0.20822520554065704,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0398515485227108,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002311725215986371,
"signal/frontier_coverage_10/centered_abs_mean": 0.06490998640656472,
"signal/frontier_coverage_10/group_std_mean": 0.08136135190725327,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016016687825322153,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009282128070481121,
"signal/frontier_coverage_15/centered_abs_mean": 0.08258948773145676,
"signal/frontier_coverage_15/group_std_mean": 0.1021083876490593,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0203911405056715,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001181029691360891,
"signal/frontier_coverage_20/centered_abs_mean": 0.1151643916964531,
"signal/frontier_coverage_20/group_std_mean": 0.1425256460905075,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028438878804445268,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016468508169054986,
"signal/frontier_coverage_25/centered_abs_mean": 0.1551089197397232,
"signal/frontier_coverage_25/group_std_mean": 0.19257171154022218,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03830631747841835,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022180575411766767,
"signal/frontier_coverage_5/centered_abs_mean": 0.16062280237674714,
"signal/frontier_coverage_5/group_std_mean": 0.206931734085083,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03959641382098198,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002296905964612961,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3264326810836792,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39315393567085266,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5637098908424377,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032643269002437594,
"step": 920
},
{
"calibration/aurc": 0.0581684721366415,
"calibration/batch_distribution_entropy": 0.951823492836569,
"calibration/buffer_distribution_entropy": 0.9756385764474949,
"calibration/confidence_entropy": 0.4881648988825013,
"calibration/coverage@0%": 0.1203125,
"calibration/coverage@1%": 0.22239583333333335,
"calibration/coverage@10%": 0.8208333333333332,
"calibration/coverage@15%": 0.8916666666666666,
"calibration/coverage@20%": 0.9229166666666668,
"calibration/coverage@25%": 1.0,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.6442708333333333,
"calibration/ece": 0.22919932812500005,
"calibration/mean_confidence": 0.6105329635416666,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002256944444444464,
"completions/max_length": 3890.8,
"completions/max_terminated_length": 3890.8,
"completions/mean_length": 1462.75087890625,
"completions/mean_terminated_length": 1466.093701171875,
"completions/min_length": 80.4,
"completions/min_terminated_length": 406.0,
"epoch": 2.2231972100348747,
"grad_norm": 0.002696170937269926,
"learning_rate": 6.911057692307694e-07,
"loss": -0.0031,
"num_tokens": 2706173173.0,
"reward": 1.0313870668411256,
"reward_std": 0.09161647409200668,
"rewards/accuracy_reward": 0.7465277910232544,
"rewards/brier_reward": 0.8327569127082824,
"rewards/confidence_uniqueness_reward": 0.948668384552002,
"rewards/format_reward": 0.9977430462837219,
"rewards/frontier_coverage_0": 0.005679074954241514,
"rewards/frontier_coverage_1": 0.005679074954241514,
"rewards/frontier_coverage_10": 0.05745949521660805,
"rewards/frontier_coverage_15": 0.12821974307298661,
"rewards/frontier_coverage_20": 0.21377132534980775,
"rewards/frontier_coverage_25": 0.3111018896102905,
"rewards/frontier_coverage_5": 0.00600547194480896,
"rewards/frontier_entropy_batch_reward": -0.29300045371055605,
"signal/accuracy_reward/centered_abs_mean": 0.10111762136220932,
"signal/accuracy_reward/group_std_mean": 0.13440315127372743,
"signal/accuracy_reward/group_zero_std_frac": 0.6166666626930237,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.913372540473938,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05055881068110466,
"signal/advantage_abs_mean": 0.776658010482788,
"signal/advantage_pre_scale_abs_mean": 0.07023707553744316,
"signal/advantage_pre_scale_std": 0.11647895723581314,
"signal/advantage_std": 0.9826768517494202,
"signal/brier_reward/centered_abs_mean": 0.1053426593542099,
"signal/brier_reward/group_std_mean": 0.13604272305965423,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19112329483032225,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010534266009926796,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016748364828526974,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022399993613362312,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030245038866996764,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016748364781960846,
"signal/format_reward/centered_abs_mean": 0.0037977430853061377,
"signal/format_reward/group_std_mean": 0.0062358868308365345,
"signal/format_reward/group_zero_std_frac": 0.9777777671813965,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.033247584290802476,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0018988715426530689,
"signal/frontier_coverage_0/centered_abs_mean": 0.14969644248485564,
"signal/frontier_coverage_0/group_std_mean": 0.19280532896518707,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03871278986334801,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002140659070573747,
"signal/frontier_coverage_1/centered_abs_mean": 0.14969644248485564,
"signal/frontier_coverage_1/group_std_mean": 0.19280532896518707,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03871278986334801,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002140659070573747,
"signal/frontier_coverage_10/centered_abs_mean": 0.06155589893460274,
"signal/frontier_coverage_10/group_std_mean": 0.07702510505914688,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01599162146449089,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008802493568509818,
"signal/frontier_coverage_15/centered_abs_mean": 0.0844751238822937,
"signal/frontier_coverage_15/group_std_mean": 0.1046410083770752,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021979451179504395,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012079942971467972,
"signal/frontier_coverage_20/centered_abs_mean": 0.11722851842641831,
"signal/frontier_coverage_20/group_std_mean": 0.14596150517463685,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030499268695712088,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001676367805339396,
"signal/frontier_coverage_25/centered_abs_mean": 0.1558176100254059,
"signal/frontier_coverage_25/group_std_mean": 0.19490368366241456,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04053145200014115,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002228191774338484,
"signal/frontier_coverage_5/centered_abs_mean": 0.14871238470077514,
"signal/frontier_coverage_5/group_std_mean": 0.19156993329524993,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03845802396535873,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021265871357172726,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32605803608894346,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.391255658864975,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5923472046852112,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03260580375790596,
"step": 925
},
{
"calibration/aurc": 0.043213178725075574,
"calibration/batch_distribution_entropy": 0.9438239170625191,
"calibration/buffer_distribution_entropy": 0.9750450086297026,
"calibration/confidence_entropy": 0.4616332302786533,
"calibration/coverage@0%": 0.2288194444444444,
"calibration/coverage@1%": 0.4826471560846561,
"calibration/coverage@10%": 0.8594742063492063,
"calibration/coverage@15%": 0.9177166005291004,
"calibration/coverage@20%": 0.9734375,
"calibration/coverage@25%": 0.9859375,
"calibration/coverage@30%": 0.9989583333333332,
"calibration/coverage@5%": 0.759077380952381,
"calibration/ece": 0.22953177084160054,
"calibration/mean_confidence": 0.6075171478091932,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003038194444444442,
"completions/max_length": 3974.2,
"completions/max_terminated_length": 3974.2,
"completions/mean_length": 1501.35712890625,
"completions/mean_terminated_length": 1505.9070068359374,
"completions/min_length": 183.8,
"completions/min_terminated_length": 446.6,
"epoch": 2.2351970600367497,
"grad_norm": 0.002601813990622759,
"learning_rate": 6.610576923076924e-07,
"loss": -0.0069,
"num_tokens": 2726565767.0,
"reward": 1.0443670511245728,
"reward_std": 0.09180981665849686,
"rewards/accuracy_reward": 0.7729166626930237,
"rewards/brier_reward": 0.8460847616195679,
"rewards/confidence_uniqueness_reward": 0.9460752367973327,
"rewards/format_reward": 0.9969618082046509,
"rewards/frontier_coverage_0": 0.006405340367928147,
"rewards/frontier_coverage_1": 0.006405340367928147,
"rewards/frontier_coverage_10": 0.0647600881755352,
"rewards/frontier_coverage_15": 0.14153310060501098,
"rewards/frontier_coverage_20": 0.234138023853302,
"rewards/frontier_coverage_25": 0.33819299936294556,
"rewards/frontier_coverage_5": 0.006916235387325287,
"rewards/frontier_entropy_batch_reward": -0.31204586625099184,
"signal/accuracy_reward/centered_abs_mean": 0.10067274197936057,
"signal/accuracy_reward/group_std_mean": 0.1319912225008011,
"signal/accuracy_reward/group_zero_std_frac": 0.6277777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8978427886962891,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05033637098968029,
"signal/advantage_abs_mean": 0.7825265645980835,
"signal/advantage_pre_scale_abs_mean": 0.07099922001361847,
"signal/advantage_pre_scale_std": 0.11690742075443268,
"signal/advantage_std": 0.9826888680458069,
"signal/brier_reward/centered_abs_mean": 0.10140125602483749,
"signal/brier_reward/group_std_mean": 0.1296430230140686,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18235966563224792,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010140126198530197,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018910757824778558,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02669762820005417,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034101661667227744,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018910758662968874,
"signal/format_reward/centered_abs_mean": 0.005365668423473835,
"signal/format_reward/group_std_mean": 0.009850092232227325,
"signal/format_reward/group_zero_std_frac": 0.9611111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0482855424284935,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0026828342117369173,
"signal/frontier_coverage_0/centered_abs_mean": 0.1465074121952057,
"signal/frontier_coverage_0/group_std_mean": 0.18885864913463593,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03758770748972893,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020950559992343187,
"signal/frontier_coverage_1/centered_abs_mean": 0.1465074121952057,
"signal/frontier_coverage_1/group_std_mean": 0.18885864913463593,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03758770748972893,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020950559992343187,
"signal/frontier_coverage_10/centered_abs_mean": 0.061785966902971265,
"signal/frontier_coverage_10/group_std_mean": 0.07676958590745926,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015890642628073694,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008835393469780684,
"signal/frontier_coverage_15/centered_abs_mean": 0.08617945164442062,
"signal/frontier_coverage_15/group_std_mean": 0.10580342113971711,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02223038859665394,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012323661474511027,
"signal/frontier_coverage_20/centered_abs_mean": 0.1186860054731369,
"signal/frontier_coverage_20/group_std_mean": 0.1463834047317505,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030619293823838233,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001697209826670587,
"signal/frontier_coverage_25/centered_abs_mean": 0.15584073662757875,
"signal/frontier_coverage_25/group_std_mean": 0.1927361845970154,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.040186097472906114,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022285224869847296,
"signal/frontier_coverage_5/centered_abs_mean": 0.14527685940265656,
"signal/frontier_coverage_5/group_std_mean": 0.18733657896518707,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03727264627814293,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020774591015651823,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3365455687046051,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40074809789657595,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6073173880577087,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033654557168483736,
"step": 930
},
{
"calibration/aurc": 0.06769820110507842,
"calibration/batch_distribution_entropy": 0.9640017323999144,
"calibration/buffer_distribution_entropy": 0.9743043475159177,
"calibration/confidence_entropy": 0.48128372910858425,
"calibration/coverage@0%": 0.14768015951274746,
"calibration/coverage@1%": 0.2824490651713151,
"calibration/coverage@10%": 0.7675570611615644,
"calibration/coverage@15%": 0.8934676184560824,
"calibration/coverage@20%": 0.9355113762638668,
"calibration/coverage@25%": 0.9682958325730248,
"calibration/coverage@30%": 0.9947643979057592,
"calibration/coverage@5%": 0.5556909172947986,
"calibration/ece": 0.20573712648435638,
"calibration/mean_confidence": 0.5876263315088851,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004513888888888884,
"completions/max_length": 4076.6,
"completions/max_terminated_length": 4076.6,
"completions/mean_length": 1563.7186767578125,
"completions/mean_terminated_length": 1570.95537109375,
"completions/min_length": 100.6,
"completions/min_terminated_length": 421.8,
"epoch": 2.2471969100386247,
"grad_norm": 0.002489317674189806,
"learning_rate": 6.310096153846154e-07,
"loss": -0.0074,
"num_tokens": 2747662654.0,
"reward": 1.0286352634429932,
"reward_std": 0.0958052396774292,
"rewards/accuracy_reward": 0.7419270753860474,
"rewards/brier_reward": 0.8244329452514648,
"rewards/confidence_uniqueness_reward": 0.9473481178283691,
"rewards/format_reward": 0.9954861164093017,
"rewards/frontier_coverage_0": 0.0055387676926329735,
"rewards/frontier_coverage_1": 0.0055387676926329735,
"rewards/frontier_coverage_10": 0.061582712829113005,
"rewards/frontier_coverage_15": 0.13054971098899842,
"rewards/frontier_coverage_20": 0.2142077714204788,
"rewards/frontier_coverage_25": 0.30671278238296507,
"rewards/frontier_coverage_5": 0.006274393014609814,
"rewards/frontier_entropy_batch_reward": -0.27694271206855775,
"signal/accuracy_reward/centered_abs_mean": 0.09981011301279068,
"signal/accuracy_reward/group_std_mean": 0.1391371890902519,
"signal/accuracy_reward/group_zero_std_frac": 0.5805555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8497473239898682,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04990505650639534,
"signal/advantage_abs_mean": 0.7577031970024108,
"signal/advantage_pre_scale_abs_mean": 0.07107750698924065,
"signal/advantage_pre_scale_std": 0.12123489528894424,
"signal/advantage_std": 0.9827822804450989,
"signal/brier_reward/centered_abs_mean": 0.10653214752674103,
"signal/brier_reward/group_std_mean": 0.13699764758348465,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18180096745491028,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010653214715421199,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020202530920505522,
"signal/confidence_uniqueness_reward/group_std_mean": 0.028109391033649445,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03422162234783173,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020202531712129714,
"signal/format_reward/centered_abs_mean": 0.007389322947710752,
"signal/format_reward/group_std_mean": 0.01228577308356762,
"signal/format_reward/group_zero_std_frac": 0.9555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06149484626948833,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003694661473855376,
"signal/frontier_coverage_0/centered_abs_mean": 0.15176095068454742,
"signal/frontier_coverage_0/group_std_mean": 0.19587263464927673,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03702950105071068,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021701816003769636,
"signal/frontier_coverage_1/centered_abs_mean": 0.15176095068454742,
"signal/frontier_coverage_1/group_std_mean": 0.19587263464927673,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03702950105071068,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021701816003769636,
"signal/frontier_coverage_10/centered_abs_mean": 0.06348835378885269,
"signal/frontier_coverage_10/group_std_mean": 0.07822826206684112,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015514366328716278,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009078834671527147,
"signal/frontier_coverage_15/centered_abs_mean": 0.08382693082094192,
"signal/frontier_coverage_15/group_std_mean": 0.10365720987319946,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020487995445728303,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011987250996753573,
"signal/frontier_coverage_20/centered_abs_mean": 0.11406213641166688,
"signal/frontier_coverage_20/group_std_mean": 0.14230601191520692,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027876751869916915,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016310885082930326,
"signal/frontier_coverage_25/centered_abs_mean": 0.14945789575576782,
"signal/frontier_coverage_25/group_std_mean": 0.18777556121349334,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03652668297290802,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00213724784553051,
"signal/frontier_coverage_5/centered_abs_mean": 0.15007020831108092,
"signal/frontier_coverage_5/group_std_mean": 0.19369837045669555,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0366180919110775,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002146004047244787,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.328479528427124,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3947932004928589,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5613398909568786,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03284795358777046,
"step": 935
},
{
"calibration/aurc": 0.09320443429342211,
"calibration/batch_distribution_entropy": 0.9289924908833758,
"calibration/buffer_distribution_entropy": 0.974310713124229,
"calibration/confidence_entropy": 0.48827981140169613,
"calibration/coverage@0%": 0.2713541666666667,
"calibration/coverage@1%": 0.34531249999999997,
"calibration/coverage@10%": 0.6015625,
"calibration/coverage@15%": 0.7182291666666667,
"calibration/coverage@20%": 0.8119791666666666,
"calibration/coverage@25%": 0.9072916666666666,
"calibration/coverage@30%": 0.9635416666666667,
"calibration/coverage@5%": 0.5192708333333333,
"calibration/ece": 0.15531255208333333,
"calibration/mean_confidence": 0.6371385937499999,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0021701388888888838,
"completions/max_length": 3889.4,
"completions/max_terminated_length": 3889.4,
"completions/mean_length": 1514.1761474609375,
"completions/mean_terminated_length": 1517.495166015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 384.8,
"epoch": 2.2591967600404996,
"grad_norm": 0.002634695265442133,
"learning_rate": 6.009615384615385e-07,
"loss": -0.0003,
"num_tokens": 2768192267.0,
"reward": 1.0425429582595824,
"reward_std": 0.09554083198308945,
"rewards/accuracy_reward": 0.7733506917953491,
"rewards/brier_reward": 0.8325282812118531,
"rewards/confidence_uniqueness_reward": 0.9469174981117249,
"rewards/format_reward": 0.9979166626930237,
"rewards/frontier_coverage_0": -0.013554162811487913,
"rewards/frontier_coverage_1": -0.013554162811487913,
"rewards/frontier_coverage_10": 0.06425249055027962,
"rewards/frontier_coverage_15": 0.13952557295560836,
"rewards/frontier_coverage_20": 0.22933673560619355,
"rewards/frontier_coverage_25": 0.3282873511314392,
"rewards/frontier_coverage_5": -0.012855465337634087,
"rewards/frontier_entropy_batch_reward": -0.313518762588501,
"signal/accuracy_reward/centered_abs_mean": 0.10990125685930252,
"signal/accuracy_reward/group_std_mean": 0.1430188611149788,
"signal/accuracy_reward/group_zero_std_frac": 0.5972222208976745,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9571273446083068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05495062842965126,
"signal/advantage_abs_mean": 0.7779755830764771,
"signal/advantage_pre_scale_abs_mean": 0.07414216324687004,
"signal/advantage_pre_scale_std": 0.12053094506263733,
"signal/advantage_std": 0.9827156066894531,
"signal/brier_reward/centered_abs_mean": 0.10234658420085907,
"signal/brier_reward/group_std_mean": 0.13204047679901124,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18013457655906678,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010234658606350422,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01755792982876301,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02490374743938446,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031031015142798424,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017557929968461394,
"signal/format_reward/centered_abs_mean": 0.0038302951259538533,
"signal/format_reward/group_std_mean": 0.007910448359325528,
"signal/format_reward/group_zero_std_frac": 0.9666666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0327956123277545,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0019151475629769267,
"signal/frontier_coverage_0/centered_abs_mean": 0.14841342717409134,
"signal/frontier_coverage_0/group_std_mean": 0.19171408116817473,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03722411021590233,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021223119460046292,
"signal/frontier_coverage_1/centered_abs_mean": 0.14841342717409134,
"signal/frontier_coverage_1/group_std_mean": 0.19171408116817473,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03722411021590233,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021223119460046292,
"signal/frontier_coverage_10/centered_abs_mean": 0.06261468380689621,
"signal/frontier_coverage_10/group_std_mean": 0.07751820534467697,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015907155349850655,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008953900076448917,
"signal/frontier_coverage_15/centered_abs_mean": 0.0870408520102501,
"signal/frontier_coverage_15/group_std_mean": 0.10760061740875244,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.022193774580955505,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012446841225028037,
"signal/frontier_coverage_20/centered_abs_mean": 0.12070697844028473,
"signal/frontier_coverage_20/group_std_mean": 0.1497057557106018,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030737898126244546,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001726109767332673,
"signal/frontier_coverage_25/centered_abs_mean": 0.15899961888790132,
"signal/frontier_coverage_25/group_std_mean": 0.1976221203804016,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0404223270714283,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022736945655196907,
"signal/frontier_coverage_5/centered_abs_mean": 0.14664799571037293,
"signal/frontier_coverage_5/group_std_mean": 0.18951908648014068,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03677628450095653,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020970664452761413,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33282300233840945,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3974955141544342,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5917717456817627,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03328230008482933,
"step": 940
},
{
"calibration/aurc": 0.08682412860157046,
"calibration/batch_distribution_entropy": 0.952244168172321,
"calibration/buffer_distribution_entropy": 0.9739697354595019,
"calibration/confidence_entropy": 0.48189116386131464,
"calibration/coverage@0%": 0.057291666666666664,
"calibration/coverage@1%": 0.109375,
"calibration/coverage@10%": 0.7216043307086615,
"calibration/coverage@15%": 0.7852403215223097,
"calibration/coverage@20%": 0.8796382874015748,
"calibration/coverage@25%": 0.9448818897637796,
"calibration/coverage@30%": 0.9616797900262467,
"calibration/coverage@5%": 0.5338459645669291,
"calibration/ece": 0.1673374657562336,
"calibration/mean_confidence": 0.5954183577345801,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001388888888888906,
"completions/max_length": 3953.4,
"completions/max_terminated_length": 3953.4,
"completions/mean_length": 1553.2009521484374,
"completions/mean_terminated_length": 1555.3579345703124,
"completions/min_length": 203.0,
"completions/min_terminated_length": 542.6,
"epoch": 2.2711966100423746,
"grad_norm": 0.0025092982687056065,
"learning_rate": 5.709134615384615e-07,
"loss": 0.0032,
"num_tokens": 2789206870.0,
"reward": 1.0252916812896729,
"reward_std": 0.10383205115795135,
"rewards/accuracy_reward": 0.7349826335906983,
"rewards/brier_reward": 0.8226475954055786,
"rewards/confidence_uniqueness_reward": 0.9491451501846313,
"rewards/format_reward": 0.9986111164093018,
"rewards/frontier_coverage_0": 0.002067159628495574,
"rewards/frontier_coverage_1": 0.002067159628495574,
"rewards/frontier_coverage_10": 0.060900063067674634,
"rewards/frontier_coverage_15": 0.1282704308629036,
"rewards/frontier_coverage_20": 0.20991497933864595,
"rewards/frontier_coverage_25": 0.29933114647865294,
"rewards/frontier_coverage_5": 0.0027842882089316847,
"rewards/frontier_entropy_batch_reward": -0.2877081334590912,
"signal/accuracy_reward/centered_abs_mean": 0.12976887822151184,
"signal/accuracy_reward/group_std_mean": 0.16400441527366638,
"signal/accuracy_reward/group_zero_std_frac": 0.5527777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0821083426475524,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06488443911075592,
"signal/advantage_abs_mean": 0.7864845633506775,
"signal/advantage_pre_scale_abs_mean": 0.08278766125440598,
"signal/advantage_pre_scale_std": 0.12968444377183913,
"signal/advantage_std": 0.9828070282936097,
"signal/brier_reward/centered_abs_mean": 0.11329959332942963,
"signal/brier_reward/group_std_mean": 0.14356452822685242,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.189946448802948,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011329959891736508,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016427206248044966,
"signal/confidence_uniqueness_reward/group_std_mean": 0.021708906069397925,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02749452255666256,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016427206806838512,
"signal/format_reward/centered_abs_mean": 0.002452256949618459,
"signal/format_reward/group_std_mean": 0.004412041790783405,
"signal/format_reward/group_zero_std_frac": 0.9833333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.019744722917675973,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0012261284748092294,
"signal/frontier_coverage_0/centered_abs_mean": 0.15941068530082703,
"signal/frontier_coverage_0/group_std_mean": 0.20225572884082793,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038180924206972125,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002279572864063084,
"signal/frontier_coverage_1/centered_abs_mean": 0.15941068530082703,
"signal/frontier_coverage_1/group_std_mean": 0.20225572884082793,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038180924206972125,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002279572864063084,
"signal/frontier_coverage_10/centered_abs_mean": 0.06408170610666275,
"signal/frontier_coverage_10/group_std_mean": 0.07931052595376968,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015410272032022476,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009163683629594743,
"signal/frontier_coverage_15/centered_abs_mean": 0.09135069847106933,
"signal/frontier_coverage_15/group_std_mean": 0.11312156021595002,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021967886388301848,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013063149759545921,
"signal/frontier_coverage_20/centered_abs_mean": 0.12903558164834977,
"signal/frontier_coverage_20/group_std_mean": 0.15990031361579896,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03101343587040901,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018452089047059418,
"signal/frontier_coverage_25/centered_abs_mean": 0.17198951840400695,
"signal/frontier_coverage_25/group_std_mean": 0.21296056509017944,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0413208082318306,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00245945006608963,
"signal/frontier_coverage_5/centered_abs_mean": 0.15726107358932495,
"signal/frontier_coverage_5/group_std_mean": 0.19959587454795838,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03766716942191124,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022488333052024245,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3204162836074829,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38639105558395387,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5392665505409241,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032041627913713455,
"step": 945
},
{
"calibration/aurc": 0.12652919135947716,
"calibration/batch_distribution_entropy": 0.9568221850468424,
"calibration/buffer_distribution_entropy": 0.9745851992890928,
"calibration/confidence_entropy": 0.4825643832794569,
"calibration/coverage@0%": 0.05265019458052019,
"calibration/coverage@1%": 0.09431686124718687,
"calibration/coverage@10%": 0.39464082582382354,
"calibration/coverage@15%": 0.5999637537489063,
"calibration/coverage@20%": 0.8665215208277772,
"calibration/coverage@25%": 0.9445594362487075,
"calibration/coverage@30%": 0.9738683464223385,
"calibration/coverage@5%": 0.2785393316419391,
"calibration/ece": 0.18753364804086017,
"calibration/mean_confidence": 0.560442754265039,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003125,
"completions/max_length": 3963.4,
"completions/max_terminated_length": 3963.4,
"completions/mean_length": 1648.2306396484375,
"completions/mean_terminated_length": 1653.50927734375,
"completions/min_length": 81.2,
"completions/min_terminated_length": 439.8,
"epoch": 2.2831964600442496,
"grad_norm": 0.002521132817491889,
"learning_rate": 5.408653846153847e-07,
"loss": -0.0058,
"num_tokens": 2811319799.0,
"reward": 1.0242114305496215,
"reward_std": 0.10054776221513748,
"rewards/accuracy_reward": 0.7371527791023255,
"rewards/brier_reward": 0.8280610561370849,
"rewards/confidence_uniqueness_reward": 0.9465069651603699,
"rewards/format_reward": 0.996875,
"rewards/frontier_coverage_0": 0.01441353103145957,
"rewards/frontier_coverage_1": 0.01441353103145957,
"rewards/frontier_coverage_10": 0.06537414789199829,
"rewards/frontier_coverage_15": 0.13175831288099288,
"rewards/frontier_coverage_20": 0.21454677879810333,
"rewards/frontier_coverage_25": 0.30874282121658325,
"rewards/frontier_coverage_5": 0.015491097513586283,
"rewards/frontier_entropy_batch_reward": -0.31195072531700135,
"signal/accuracy_reward/centered_abs_mean": 0.11291232705116272,
"signal/accuracy_reward/group_std_mean": 0.15053357183933258,
"signal/accuracy_reward/group_zero_std_frac": 0.5638889014720917,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9593138813972473,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05645616352558136,
"signal/advantage_abs_mean": 0.7690176606178284,
"signal/advantage_pre_scale_abs_mean": 0.0772487387061119,
"signal/advantage_pre_scale_std": 0.1267082616686821,
"signal/advantage_std": 0.9827841877937317,
"signal/brier_reward/centered_abs_mean": 0.10452846437692642,
"signal/brier_reward/group_std_mean": 0.13697410225868226,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17854879796504974,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010452846810221673,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018856966122984885,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026795653998851775,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032309388369321825,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018856966169551014,
"signal/format_reward/centered_abs_mean": 0.005110677098855376,
"signal/format_reward/group_std_mean": 0.009960832260549068,
"signal/format_reward/group_zero_std_frac": 0.9555555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04381188787519932,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002555338549427688,
"signal/frontier_coverage_0/centered_abs_mean": 0.14766640961170197,
"signal/frontier_coverage_0/group_std_mean": 0.19499149024486542,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036027568578720096,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021116294898092746,
"signal/frontier_coverage_1/centered_abs_mean": 0.14766640961170197,
"signal/frontier_coverage_1/group_std_mean": 0.19499149024486542,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036027568578720096,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021116294898092746,
"signal/frontier_coverage_10/centered_abs_mean": 0.06306469812989235,
"signal/frontier_coverage_10/group_std_mean": 0.07838996946811676,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015405329130589961,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009018251672387123,
"signal/frontier_coverage_15/centered_abs_mean": 0.0873841717839241,
"signal/frontier_coverage_15/group_std_mean": 0.10764139890670776,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021306929364800452,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012495935894548892,
"signal/frontier_coverage_20/centered_abs_mean": 0.12113028168678283,
"signal/frontier_coverage_20/group_std_mean": 0.15000077486038207,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02950175330042839,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017321630381047725,
"signal/frontier_coverage_25/centered_abs_mean": 0.16070158481597902,
"signal/frontier_coverage_25/group_std_mean": 0.19954589903354644,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03912241980433464,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022980326786637304,
"signal/frontier_coverage_5/centered_abs_mean": 0.1454429507255554,
"signal/frontier_coverage_5/group_std_mean": 0.1921371579170227,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03548334017395973,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002079833997413516,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33761860728263854,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4023285984992981,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5767180442810058,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03376186117529869,
"step": 950
},
{
"epoch": 2.2831964600442496,
"eval_calibration/aurc": 0.13675649961578237,
"eval_calibration/batch_distribution_entropy": 0.9091355165127851,
"eval_calibration/buffer_distribution_entropy": 0.97407149477294,
"eval_calibration/confidence_entropy": 0.4783477271683041,
"eval_calibration/coverage@0%": 0.23454301075268816,
"eval_calibration/coverage@1%": 0.23454301075268816,
"eval_calibration/coverage@10%": 0.4637096774193548,
"eval_calibration/coverage@15%": 0.6001344086021505,
"eval_calibration/coverage@20%": 0.8099798387096774,
"eval_calibration/coverage@25%": 0.862231182795699,
"eval_calibration/coverage@30%": 0.984375,
"eval_calibration/coverage@5%": 0.3387096774193548,
"eval_calibration/ece": 0.21853504704301074,
"eval_calibration/mean_confidence": 0.5743662970430107,
"eval_completions/clipped_ratio": 0.00347222222222221,
"eval_completions/max_length": 3536.0,
"eval_completions/max_terminated_length": 3536.0,
"eval_completions/mean_length": 1604.8047281901042,
"eval_completions/mean_terminated_length": 1610.3856201171875,
"eval_completions/min_length": 421.5,
"eval_completions/min_terminated_length": 636.0,
"eval_loss": 0.0,
"eval_num_tokens": 2811319799.0,
"eval_reward": 0.9298228621482849,
"eval_reward_std": 0.23609469334284464,
"eval_rewards/accuracy_reward": 0.6953125,
"eval_rewards/brier_reward": 0.8285978237787882,
"eval_rewards/confidence_uniqueness_reward": 0.8950509230295817,
"eval_rewards/format_reward": 0.995659718910853,
"eval_rewards/frontier_coverage_0": 0.04206457252924641,
"eval_rewards/frontier_coverage_1": 0.04206457252924641,
"eval_rewards/frontier_coverage_10": 0.0667329914867878,
"eval_rewards/frontier_coverage_15": 0.12518454591433206,
"eval_rewards/frontier_coverage_20": 0.20115434378385544,
"eval_rewards/frontier_coverage_25": 0.2872895747423172,
"eval_rewards/frontier_coverage_5": 0.04235087055712938,
"eval_rewards/frontier_entropy_batch_reward": -0.995659718910853,
"eval_runtime": 206.1535,
"eval_samples_per_second": 4.851,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4056532184282939,
"eval_signal/accuracy_reward/group_std_mean": 0.4547837922970454,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8753860890865326,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20282660921414694,
"eval_signal/advantage_abs_mean": 0.8655439913272858,
"eval_signal/advantage_pre_scale_abs_mean": 0.20619056125481924,
"eval_signal/advantage_pre_scale_std": 0.2349847455819448,
"eval_signal/advantage_std": 0.9863970478375753,
"eval_signal/brier_reward/centered_abs_mean": 0.1640371655424436,
"eval_signal/brier_reward/group_std_mean": 0.22055867314338684,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07109572117527325,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.016403717764963705,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045909797151883446,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06451402107874553,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01983573194593191,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004590979932496945,
"eval_signal/format_reward/centered_abs_mean": 0.008300781094779571,
"eval_signal/format_reward/group_std_mean": 0.021562910017867882,
"eval_signal/format_reward/group_zero_std_frac": 0.8888889054457346,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.017497866414487362,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.004150390547389786,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2689051379760106,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3698471784591675,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016674190914879244,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038453434826806188,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2689051379760106,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3698471784591675,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016674190914879244,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038453434826806188,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.08195547511180241,
"eval_signal/frontier_coverage_10/group_std_mean": 0.10345051437616348,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005070453975349665,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00117196326997752,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.14475023746490479,
"eval_signal/frontier_coverage_15/group_std_mean": 0.18270048995812735,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.008945515534530083,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002069928372899691,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.23411591102679571,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2887367457151413,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014464873975763718,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003347857428404192,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3328509529431661,
"eval_signal/frontier_coverage_25/group_std_mean": 0.40653078258037567,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02055966140081485,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004759768722578883,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2655777111649513,
"eval_signal/frontier_coverage_5/group_std_mean": 0.36574364205201465,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016468066566934187,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037977612810209394,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008300781094779571,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.021562910017867882,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8888889054457346,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0034995736399044595,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008300781288805107,
"eval_steps_per_second": 0.029,
"step": 950
},
{
"epoch": 2.2831964600442496,
"step": 950,
"train_probe_calibration/aurc": 0.07581074120148544,
"train_probe_calibration/batch_distribution_entropy": 0.899964335250737,
"train_probe_calibration/buffer_distribution_entropy": 0.9737900708282261,
"train_probe_calibration/confidence_entropy": 0.4739935910661874,
"train_probe_calibration/coverage@0%": 0.4527889784946237,
"train_probe_calibration/coverage@1%": 0.4527889784946237,
"train_probe_calibration/coverage@10%": 0.7192540322580645,
"train_probe_calibration/coverage@15%": 0.8946572580645161,
"train_probe_calibration/coverage@20%": 0.962869623655914,
"train_probe_calibration/coverage@25%": 0.989247311827957,
"train_probe_calibration/coverage@30%": 1.0,
"train_probe_calibration/coverage@5%": 0.5885416666666666,
"train_probe_calibration/ece": 0.213778746639785,
"train_probe_calibration/mean_confidence": 0.6232134240591398,
"train_probe_completions/clipped_ratio": 0.007638888888888917,
"train_probe_completions/max_length": 3633.5,
"train_probe_completions/max_terminated_length": 3633.5,
"train_probe_completions/mean_length": 1592.7953491210938,
"train_probe_completions/mean_terminated_length": 1604.8572794596355,
"train_probe_completions/min_length": 89.0,
"train_probe_completions/min_terminated_length": 551.3333333333334,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 2811319799.0,
"train_probe_reward": 0.9637367725372314,
"train_probe_reward_std": 0.2208165650566419,
"train_probe_rewards/accuracy_reward": 0.761284718910853,
"train_probe_rewards/brier_reward": 0.8417325516541799,
"train_probe_rewards/confidence_uniqueness_reward": 0.8906635443369547,
"train_probe_rewards/format_reward": 0.9947916766007742,
"train_probe_rewards/frontier_coverage_0": 0.005254412613188227,
"train_probe_rewards/frontier_coverage_1": 0.005254412613188227,
"train_probe_rewards/frontier_coverage_10": 0.07371875147024791,
"train_probe_rewards/frontier_coverage_15": 0.15096323440472284,
"train_probe_rewards/frontier_coverage_20": 0.24458193282286325,
"train_probe_rewards/frontier_coverage_25": 0.3490742842356364,
"train_probe_rewards/frontier_coverage_5": 0.005987585289403796,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9947916766007742,
"train_probe_runtime": 217.635,
"train_probe_samples_per_second": 4.595,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3530273387829463,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4242842694123586,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8170821766058604,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17651366939147314,
"train_probe_signal/advantage_abs_mean": 0.8060585856437683,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.18002791702747345,
"train_probe_signal/advantage_pre_scale_std": 0.22036000341176987,
"train_probe_signal/advantage_std": 0.986365924278895,
"train_probe_signal/brier_reward/centered_abs_mean": 0.15322893857955933,
"train_probe_signal/brier_reward/group_std_mean": 0.2087702974677086,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07108517860372861,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.015322894168396791,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04701675598820051,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.06953836977481842,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021755116681257885,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0047016756143420935,
"train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475,
"train_probe_signal/format_reward/group_std_mean": 0.02946278266608715,
"train_probe_signal/format_reward/group_zero_std_frac": 0.8333333631356558,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022791087937851746,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.23706353455781937,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.34765902161598206,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01574373881643017,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003390008544859787,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.23706353455781937,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.34765902161598206,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01574373881643017,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003390008544859787,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.08195397506157558,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.10161611934502919,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0054444929119199514,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00117194183015575,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1490315372745196,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.18034624059995016,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00989202270284295,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021311509578178325,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.2324892282485962,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.2789619415998459,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015423213442166647,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003324596017288665,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.321807121237119,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.38699104885260266,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021341119272013504,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0046018418700744705,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.23428418238957724,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3439280440409978,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015559157667060694,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033502636554961405,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.010091145522892475,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.02946278266608715,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333631356558,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004558217866967122,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0010091146104969084,
"train_probe_steps_per_second": 0.028
},
{
"calibration/aurc": 0.14273125825617144,
"calibration/batch_distribution_entropy": 0.9550349669352135,
"calibration/buffer_distribution_entropy": 0.9742140739784955,
"calibration/confidence_entropy": 0.494557200447038,
"calibration/coverage@0%": 0.13072916666666667,
"calibration/coverage@1%": 0.2260416666666667,
"calibration/coverage@10%": 0.4385416666666666,
"calibration/coverage@15%": 0.5364583333333334,
"calibration/coverage@20%": 0.63125,
"calibration/coverage@25%": 0.8817708333333334,
"calibration/coverage@30%": 0.9203125,
"calibration/coverage@5%": 0.33489583333333334,
"calibration/ece": 0.19657210416666665,
"calibration/mean_confidence": 0.5674274791666667,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0032986111111110938,
"completions/max_length": 3983.8,
"completions/max_terminated_length": 3983.8,
"completions/mean_length": 1634.6527099609375,
"completions/mean_terminated_length": 1640.19443359375,
"completions/min_length": 180.2,
"completions/min_terminated_length": 544.4,
"epoch": 2.2951963100461246,
"grad_norm": 0.0024574222043156624,
"learning_rate": 5.108173076923077e-07,
"loss": -0.0033,
"num_tokens": 2833227638.0,
"reward": 1.0299370527267455,
"reward_std": 0.093611079454422,
"rewards/accuracy_reward": 0.7506944417953492,
"rewards/brier_reward": 0.8379656314849854,
"rewards/confidence_uniqueness_reward": 0.9445121049880981,
"rewards/format_reward": 0.9967013835906983,
"rewards/frontier_coverage_0": 0.011254264181479812,
"rewards/frontier_coverage_1": 0.011254264181479812,
"rewards/frontier_coverage_10": 0.06909877061843872,
"rewards/frontier_coverage_15": 0.14030956625938415,
"rewards/frontier_coverage_20": 0.22847531437873841,
"rewards/frontier_coverage_25": 0.32808240652084353,
"rewards/frontier_coverage_5": 0.011783378524705767,
"rewards/frontier_entropy_batch_reward": -0.3345234453678131,
"signal/accuracy_reward/centered_abs_mean": 0.10070529580116272,
"signal/accuracy_reward/group_std_mean": 0.139546899497509,
"signal/accuracy_reward/group_zero_std_frac": 0.5805555820465088,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8801249861717224,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05035264790058136,
"signal/advantage_abs_mean": 0.7685187101364136,
"signal/advantage_pre_scale_abs_mean": 0.07071012929081917,
"signal/advantage_pre_scale_std": 0.11747289299964905,
"signal/advantage_std": 0.9827180862426758,
"signal/brier_reward/centered_abs_mean": 0.1026095524430275,
"signal/brier_reward/group_std_mean": 0.13352414667606355,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18136341571807862,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010260955616831779,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019099758937954903,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026142006739974022,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.033878518640995024,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019099759869277478,
"signal/format_reward/centered_abs_mean": 0.004763454850763083,
"signal/format_reward/group_std_mean": 0.00826217420399189,
"signal/format_reward/group_zero_std_frac": 0.9666666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.042343306541442874,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0023817274253815413,
"signal/frontier_coverage_0/centered_abs_mean": 0.14085240364074708,
"signal/frontier_coverage_0/group_std_mean": 0.18698779344558716,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03563583679497242,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020141893532127143,
"signal/frontier_coverage_1/centered_abs_mean": 0.14085240364074708,
"signal/frontier_coverage_1/group_std_mean": 0.18698779344558716,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03563583679497242,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020141893532127143,
"signal/frontier_coverage_10/centered_abs_mean": 0.06276597455143929,
"signal/frontier_coverage_10/group_std_mean": 0.07800111174583435,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01591112706810236,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008975534467026592,
"signal/frontier_coverage_15/centered_abs_mean": 0.09101367890834808,
"signal/frontier_coverage_15/group_std_mean": 0.11218550503253936,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.023008933290839195,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013014955911785364,
"signal/frontier_coverage_20/centered_abs_mean": 0.12615538388490677,
"signal/frontier_coverage_20/group_std_mean": 0.15606312751770018,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03185085244476795,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018040220718830823,
"signal/frontier_coverage_25/centered_abs_mean": 0.1652356654405594,
"signal/frontier_coverage_25/group_std_mean": 0.205272775888443,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04169749319553375,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002362869819626212,
"signal/frontier_coverage_5/centered_abs_mean": 0.13930575549602509,
"signal/frontier_coverage_5/group_std_mean": 0.18501150012016296,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0352470863610506,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019920722115784883,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33024551868438723,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39427871704101564,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5849268555641174,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03302455134689808,
"step": 955
},
{
"calibration/aurc": 0.1296094462033201,
"calibration/batch_distribution_entropy": 0.9691447113783008,
"calibration/buffer_distribution_entropy": 0.9751247270327952,
"calibration/confidence_entropy": 0.4844039802407488,
"calibration/coverage@0%": 0.190625,
"calibration/coverage@1%": 0.3703125,
"calibration/coverage@10%": 0.5713541666666666,
"calibration/coverage@15%": 0.6640625,
"calibration/coverage@20%": 0.7374999999999999,
"calibration/coverage@25%": 0.7828125000000001,
"calibration/coverage@30%": 0.8005208333333332,
"calibration/coverage@5%": 0.484375,
"calibration/ece": 0.1975291145833334,
"calibration/mean_confidence": 0.5528219270833333,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002517361111111138,
"completions/max_length": 4036.2,
"completions/max_terminated_length": 4036.2,
"completions/mean_length": 1708.0447265625,
"completions/mean_terminated_length": 1712.33408203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 578.2,
"epoch": 2.3071961600479995,
"grad_norm": 0.0025664744898676872,
"learning_rate": 4.807692307692308e-07,
"loss": -0.0059,
"num_tokens": 2855985721.0,
"reward": 1.0242048621177673,
"reward_std": 0.1011396512389183,
"rewards/accuracy_reward": 0.7341145873069763,
"rewards/brier_reward": 0.8207546234130859,
"rewards/confidence_uniqueness_reward": 0.9482064723968506,
"rewards/format_reward": 0.997569453716278,
"rewards/frontier_coverage_0": 0.005981969460844993,
"rewards/frontier_coverage_1": 0.005981969460844993,
"rewards/frontier_coverage_10": 0.06367864459753036,
"rewards/frontier_coverage_15": 0.12802914083003997,
"rewards/frontier_coverage_20": 0.2087888687849045,
"rewards/frontier_coverage_25": 0.3009022116661072,
"rewards/frontier_coverage_5": 0.0064484432339668276,
"rewards/frontier_entropy_batch_reward": -0.28826587498188017,
"signal/accuracy_reward/centered_abs_mean": 0.11920030564069747,
"signal/accuracy_reward/group_std_mean": 0.15845068097114562,
"signal/accuracy_reward/group_zero_std_frac": 0.5416666805744171,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9978924989700317,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05960015282034874,
"signal/advantage_abs_mean": 0.7690481185913086,
"signal/advantage_pre_scale_abs_mean": 0.07742958068847657,
"signal/advantage_pre_scale_std": 0.12594334036111832,
"signal/advantage_std": 0.9828057885169983,
"signal/brier_reward/centered_abs_mean": 0.11113527119159698,
"signal/brier_reward/group_std_mean": 0.14212769269943237,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1862643241882324,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011113526858389377,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01761804409325123,
"signal/confidence_uniqueness_reward/group_std_mean": 0.025302357226610183,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029750457778573037,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017618043581023811,
"signal/format_reward/centered_abs_mean": 0.004372829792555421,
"signal/format_reward/group_std_mean": 0.008926727809011936,
"signal/format_reward/group_zero_std_frac": 0.9611111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.036962130852043626,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0021864148962777107,
"signal/frontier_coverage_0/centered_abs_mean": 0.16451604068279266,
"signal/frontier_coverage_0/group_std_mean": 0.21193538308143617,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039420148730278014,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023525793571025134,
"signal/frontier_coverage_1/centered_abs_mean": 0.16451604068279266,
"signal/frontier_coverage_1/group_std_mean": 0.21193538308143617,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039420148730278014,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023525793571025134,
"signal/frontier_coverage_10/centered_abs_mean": 0.06357394829392433,
"signal/frontier_coverage_10/group_std_mean": 0.07909071594476699,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015305314771831035,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009091074694879353,
"signal/frontier_coverage_15/centered_abs_mean": 0.08338246792554856,
"signal/frontier_coverage_15/group_std_mean": 0.10340235531330108,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02012072168290615,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011923692654818296,
"signal/frontier_coverage_20/centered_abs_mean": 0.11507630050182342,
"signal/frontier_coverage_20/group_std_mean": 0.14356274902820587,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027765774726867677,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016455910867080093,
"signal/frontier_coverage_25/centered_abs_mean": 0.1533314347267151,
"signal/frontier_coverage_25/group_std_mean": 0.1919599086046219,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03697655647993088,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00219263955950737,
"signal/frontier_coverage_5/centered_abs_mean": 0.16278515756130219,
"signal/frontier_coverage_5/group_std_mean": 0.20979312360286712,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0390064924955368,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023278276901692154,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31930898427963256,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38691142201423645,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5392434418201446,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03193089962005615,
"step": 960
},
{
"calibration/aurc": 0.14038244862449353,
"calibration/batch_distribution_entropy": 0.9546107782096881,
"calibration/batch_entropy_100bins": 0.9485610508453283,
"calibration/batch_entropy_10bins": 0.9546107782096881,
"calibration/batch_entropy_50bins": 0.9569943377115798,
"calibration/batch_uniqueness": 0.9503503874846292,
"calibration/confidence_entropy": 0.488693646883382,
"calibration/coverage@0%": 0.0385498687664042,
"calibration/coverage@1%": 0.0385498687664042,
"calibration/coverage@10%": 0.2287483595800525,
"calibration/coverage@15%": 0.6391199146981626,
"calibration/coverage@20%": 0.8946030183727034,
"calibration/coverage@25%": 0.9744627624671915,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.14584153543307088,
"calibration/distribution_entropy_10": 0.9546107782096881,
"calibration/distribution_entropy_100": 0.9485610508453283,
"calibration/ece": 0.18004387381069556,
"calibration/mean_confidence": 0.6024290353920604,
"calibration/unique_confidence_per_question": 0.7791666666666667,
"calibration/unique_confidences": 299.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0014756944444444641,
"completions/max_length": 3931.6,
"completions/max_terminated_length": 3931.6,
"completions/mean_length": 1719.3753662109375,
"completions/mean_terminated_length": 1721.9231201171874,
"completions/min_length": 0.0,
"completions/min_terminated_length": 465.6,
"epoch": 2.3191960100498745,
"grad_norm": 0.00238273898139596,
"learning_rate": 4.507211538461539e-07,
"loss": -0.0002,
"num_tokens": 2878891805.0,
"reward": 1.0996188402175904,
"reward_std": 0.09667720943689347,
"rewards/accuracy_reward": 0.7565104246139527,
"rewards/brier_reward": 0.8399082064628601,
"rewards/confidence_uniqueness_reward": 0.9473569512367248,
"rewards/format_reward": 0.9984375,
"rewards/frontier_coverage_0": 0.7498524904251098,
"rewards/frontier_coverage_1": 0.7498524904251098,
"rewards/frontier_coverage_10": 0.7498524904251098,
"rewards/frontier_coverage_15": 0.7498524904251098,
"rewards/frontier_coverage_20": 0.7498524904251098,
"rewards/frontier_coverage_25": 0.7498524904251098,
"rewards/frontier_coverage_5": 0.7498524904251098,
"rewards/frontier_entropy_batch_reward": -0.31641929149627684,
"signal/accuracy_reward/centered_abs_mean": 0.10496419221162796,
"signal/accuracy_reward/group_std_mean": 0.14212748557329177,
"signal/accuracy_reward/group_zero_std_frac": 0.5777777910232544,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8967318654060363,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05248209610581398,
"signal/advantage_abs_mean": 0.7712378382682801,
"signal/advantage_pre_scale_abs_mean": 0.07349895536899567,
"signal/advantage_pre_scale_std": 0.1213487908244133,
"signal/advantage_std": 0.9827686429023743,
"signal/brier_reward/centered_abs_mean": 0.10113995522260666,
"signal/brier_reward/group_std_mean": 0.1298495277762413,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17362068593502045,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01011399570852518,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017154244333505632,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02381323203444481,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02954300418496132,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017154245171695948,
"signal/format_reward/centered_abs_mean": 0.002907986077480018,
"signal/format_reward/group_std_mean": 0.006289407191798091,
"signal/format_reward/group_zero_std_frac": 0.9722222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.024624919146299364,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001453993038740009,
"signal/frontier_coverage_0/centered_abs_mean": 0.14556353986263276,
"signal/frontier_coverage_0/group_std_mean": 0.18044343292713166,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03574709594249725,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002081558620557189,
"signal/frontier_coverage_1/centered_abs_mean": 0.14556353986263276,
"signal/frontier_coverage_1/group_std_mean": 0.18044343292713166,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03574709594249725,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002081558620557189,
"signal/frontier_coverage_10/centered_abs_mean": 0.14556353986263276,
"signal/frontier_coverage_10/group_std_mean": 0.18044343292713166,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03574709594249725,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002081558620557189,
"signal/frontier_coverage_15/centered_abs_mean": 0.14556353986263276,
"signal/frontier_coverage_15/group_std_mean": 0.18044343292713166,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03574709594249725,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002081558620557189,
"signal/frontier_coverage_20/centered_abs_mean": 0.14556353986263276,
"signal/frontier_coverage_20/group_std_mean": 0.18044343292713166,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03574709594249725,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002081558620557189,
"signal/frontier_coverage_25/centered_abs_mean": 0.14556353986263276,
"signal/frontier_coverage_25/group_std_mean": 0.18044343292713166,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03574709594249725,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002081558620557189,
"signal/frontier_coverage_5/centered_abs_mean": 0.14556353986263276,
"signal/frontier_coverage_5/group_std_mean": 0.18044343292713166,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03574709594249725,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002081558620557189,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32960216999053954,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3915234744548798,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5683565855026245,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03296021893620491,
"step": 965
},
{
"calibration/aurc": 0.11166700871292294,
"calibration/batch_distribution_entropy": 0.9278303770940528,
"calibration/batch_entropy_100bins": 0.9340343879809385,
"calibration/batch_entropy_10bins": 0.9278303770940528,
"calibration/batch_entropy_50bins": 0.9419337319119098,
"calibration/batch_uniqueness": 0.9451849405959892,
"calibration/confidence_entropy": 0.468146193171627,
"calibration/coverage@0%": 0.09965051131418624,
"calibration/coverage@1%": 0.25905814838990426,
"calibration/coverage@10%": 0.6087140992167102,
"calibration/coverage@15%": 0.6899926566579635,
"calibration/coverage@20%": 0.8051362597911227,
"calibration/coverage@25%": 0.9020833333333332,
"calibration/coverage@30%": 0.9364583333333334,
"calibration/coverage@5%": 0.35659812880765884,
"calibration/distribution_entropy_10": 0.9278303770940528,
"calibration/distribution_entropy_100": 0.9340343879809385,
"calibration/ece": 0.14054123814186253,
"calibration/mean_confidence": 0.6327841136858138,
"calibration/unique_confidence_per_question": 0.78125,
"calibration/unique_confidences": 300.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00546875,
"completions/max_length": 4001.6,
"completions/max_terminated_length": 4001.6,
"completions/mean_length": 1679.43603515625,
"completions/mean_terminated_length": 1688.64921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 607.6,
"epoch": 2.3311958600517495,
"grad_norm": 0.0024200736079365015,
"learning_rate": 4.20673076923077e-07,
"loss": -0.0136,
"num_tokens": 2901341820.0,
"reward": 1.0947325229644775,
"reward_std": 0.0987067922949791,
"rewards/accuracy_reward": 0.7580729126930237,
"rewards/brier_reward": 0.8338273406028748,
"rewards/confidence_uniqueness_reward": 0.9415804147720337,
"rewards/format_reward": 0.99453125,
"rewards/frontier_coverage_0": 0.7456173777580262,
"rewards/frontier_coverage_1": 0.7456173777580262,
"rewards/frontier_coverage_10": 0.7456173777580262,
"rewards/frontier_coverage_15": 0.7456173777580262,
"rewards/frontier_coverage_20": 0.7456173777580262,
"rewards/frontier_coverage_25": 0.7456173777580262,
"rewards/frontier_coverage_5": 0.7456173777580262,
"rewards/frontier_entropy_batch_reward": -0.3374656796455383,
"signal/accuracy_reward/centered_abs_mean": 0.10191514790058136,
"signal/accuracy_reward/group_std_mean": 0.13759705126285554,
"signal/accuracy_reward/group_zero_std_frac": 0.5944444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9039199709892273,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05095757395029068,
"signal/advantage_abs_mean": 0.7747997283935547,
"signal/advantage_pre_scale_abs_mean": 0.07487674057483673,
"signal/advantage_pre_scale_std": 0.12937503159046174,
"signal/advantage_std": 0.9827169299125671,
"signal/brier_reward/centered_abs_mean": 0.10510815382003784,
"signal/brier_reward/group_std_mean": 0.13565416336059571,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18632941842079162,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010510815307497978,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022095327824354173,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030155374109745024,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03923738077282905,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002209532866254449,
"signal/format_reward/centered_abs_mean": 0.00778537318110466,
"signal/format_reward/group_std_mean": 0.012528749741613865,
"signal/format_reward/group_zero_std_frac": 0.9555555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06878926306962967,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00389268659055233,
"signal/frontier_coverage_0/centered_abs_mean": 0.14680063724517822,
"signal/frontier_coverage_0/group_std_mean": 0.18313476145267488,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03721518889069557,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002099249139428139,
"signal/frontier_coverage_1/centered_abs_mean": 0.14680063724517822,
"signal/frontier_coverage_1/group_std_mean": 0.18313476145267488,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03721518889069557,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002099249139428139,
"signal/frontier_coverage_10/centered_abs_mean": 0.14680063724517822,
"signal/frontier_coverage_10/group_std_mean": 0.18313476145267488,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03721518889069557,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002099249139428139,
"signal/frontier_coverage_15/centered_abs_mean": 0.14680063724517822,
"signal/frontier_coverage_15/group_std_mean": 0.18313476145267488,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03721518889069557,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002099249139428139,
"signal/frontier_coverage_20/centered_abs_mean": 0.14680063724517822,
"signal/frontier_coverage_20/group_std_mean": 0.18313476145267488,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03721518889069557,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002099249139428139,
"signal/frontier_coverage_25/centered_abs_mean": 0.14680063724517822,
"signal/frontier_coverage_25/group_std_mean": 0.18313476145267488,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03721518889069557,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002099249139428139,
"signal/frontier_coverage_5/centered_abs_mean": 0.14680063724517822,
"signal/frontier_coverage_5/group_std_mean": 0.18313476145267488,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03721518889069557,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002099249139428139,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3281488955020905,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39026449918746947,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5830213069915772,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03281489051878452,
"step": 970
},
{
"calibration/aurc": 0.048921450858412416,
"calibration/batch_distribution_entropy": 0.9410208167580578,
"calibration/batch_entropy_100bins": 0.9465772863304492,
"calibration/batch_entropy_10bins": 0.9410208167580578,
"calibration/batch_entropy_50bins": 0.9525928211916714,
"calibration/batch_uniqueness": 0.9487509328672419,
"calibration/buffer_distribution_entropy": 0.9602970568035275,
"calibration/buffer_entropy_100bins": 0.9785693215386517,
"calibration/buffer_entropy_10bins": 0.9602970568035275,
"calibration/buffer_entropy_50bins": 0.9755793518739612,
"calibration/confidence_entropy": 0.47804082687512295,
"calibration/coverage@0%": 0.19668869669277633,
"calibration/coverage@1%": 0.3411730308964317,
"calibration/coverage@10%": 0.8179952676240209,
"calibration/coverage@15%": 0.898815546127067,
"calibration/coverage@20%": 0.9614066579634464,
"calibration/coverage@25%": 0.9921834203655353,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.7199507724107919,
"calibration/distribution_entropy_10": 0.9410208167580578,
"calibration/distribution_entropy_100": 0.9465772863304492,
"calibration/ece": 0.16123091737296644,
"calibration/mean_confidence": 0.6316424365869987,
"calibration/unique_confidence_per_question": 0.7776041666666667,
"calibration/unique_confidences": 298.6,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003125,
"completions/max_length": 4034.8,
"completions/max_terminated_length": 4034.8,
"completions/mean_length": 1692.332421875,
"completions/mean_terminated_length": 1697.679736328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 541.2,
"epoch": 2.3431957100536245,
"grad_norm": 0.002366076922044158,
"learning_rate": 3.90625e-07,
"loss": -0.0098,
"num_tokens": 2923899633.0,
"reward": 1.0969661951065064,
"reward_std": 0.09302805066108703,
"rewards/accuracy_reward": 0.7771701335906982,
"rewards/brier_reward": 0.8628766059875488,
"rewards/confidence_uniqueness_reward": 0.943256139755249,
"rewards/format_reward": 0.996875,
"rewards/frontier_coverage_0": 0.6225879438221454,
"rewards/frontier_coverage_1": 0.6225879438221454,
"rewards/frontier_coverage_10": 0.639605250954628,
"rewards/frontier_coverage_15": 0.6584436506032944,
"rewards/frontier_coverage_20": 0.6852690279483795,
"rewards/frontier_coverage_25": 0.7049791038036346,
"rewards/frontier_coverage_5": 0.6253482840955258,
"rewards/frontier_entropy_batch_reward": -0.35860825181007383,
"signal/accuracy_reward/centered_abs_mean": 0.0921603724360466,
"signal/accuracy_reward/group_std_mean": 0.12776372581720352,
"signal/accuracy_reward/group_zero_std_frac": 0.6138888835906983,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8368605613708496,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0460801862180233,
"signal/advantage_abs_mean": 0.7728834629058838,
"signal/advantage_pre_scale_abs_mean": 0.06947359591722488,
"signal/advantage_pre_scale_std": 0.12066877037286758,
"signal/advantage_std": 0.9826517581939698,
"signal/brier_reward/centered_abs_mean": 0.09350483268499374,
"signal/brier_reward/group_std_mean": 0.12106747925281525,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1714227616786957,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009350483864545822,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020120499655604362,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02889779768884182,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03697131425142288,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002012050012126565,
"signal/format_reward/centered_abs_mean": 0.005305989552289248,
"signal/format_reward/group_std_mean": 0.010686865262687206,
"signal/format_reward/group_zero_std_frac": 0.9527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.048372025787830356,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002652994776144624,
"signal/frontier_coverage_0/centered_abs_mean": 0.134758859872818,
"signal/frontier_coverage_0/group_std_mean": 0.16941750347614287,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03526972904801369,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019270517397671938,
"signal/frontier_coverage_1/centered_abs_mean": 0.134758859872818,
"signal/frontier_coverage_1/group_std_mean": 0.16941750347614287,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03526972904801369,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019270517397671938,
"signal/frontier_coverage_10/centered_abs_mean": 0.12533134520053862,
"signal/frontier_coverage_10/group_std_mean": 0.15608875453472137,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.032518448680639266,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017922382801771164,
"signal/frontier_coverage_15/centered_abs_mean": 0.13210234493017198,
"signal/frontier_coverage_15/group_std_mean": 0.16414720118045806,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03449446447193623,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018890635343268513,
"signal/frontier_coverage_20/centered_abs_mean": 0.14086658358573914,
"signal/frontier_coverage_20/group_std_mean": 0.17497550547122956,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03705217763781547,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020143922185525296,
"signal/frontier_coverage_25/centered_abs_mean": 0.14678715467453002,
"signal/frontier_coverage_25/group_std_mean": 0.182331645488739,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03878000974655151,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020990563789382577,
"signal/frontier_coverage_5/centered_abs_mean": 0.12934952080249787,
"signal/frontier_coverage_5/group_std_mean": 0.1625670924782753,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03369109369814396,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018496982054784894,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3399089515209198,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40003854036331177,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6255879998207092,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03399089723825455,
"step": 975
},
{
"calibration/aurc": 0.14298997154471915,
"calibration/batch_distribution_entropy": 0.9500791369676123,
"calibration/batch_entropy_100bins": 0.9463294097352252,
"calibration/batch_entropy_10bins": 0.9500791369676123,
"calibration/batch_entropy_50bins": 0.9563625427542449,
"calibration/batch_uniqueness": 0.9484823954659024,
"calibration/buffer_distribution_entropy": 0.9642788407621218,
"calibration/buffer_entropy_100bins": 0.9807640451660106,
"calibration/buffer_entropy_10bins": 0.9642788407621218,
"calibration/buffer_entropy_50bins": 0.978117026514248,
"calibration/confidence_entropy": 0.47567245891365834,
"calibration/coverage@0%": 0.10758928571428572,
"calibration/coverage@1%": 0.22373511904761903,
"calibration/coverage@10%": 0.40436507936507937,
"calibration/coverage@15%": 0.5170552248677248,
"calibration/coverage@20%": 0.6459656084656085,
"calibration/coverage@25%": 0.8712136243386244,
"calibration/coverage@30%": 0.9302910052910054,
"calibration/coverage@5%": 0.3556878306878307,
"calibration/distribution_entropy_10": 0.9500791369676123,
"calibration/distribution_entropy_100": 0.9463294097352252,
"calibration/ece": 0.15938840153769837,
"calibration/mean_confidence": 0.5839293696263227,
"calibration/unique_confidence_per_question": 0.7890625000000001,
"calibration/unique_confidences": 303.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003645833333333348,
"completions/max_length": 4007.8,
"completions/max_terminated_length": 4007.8,
"completions/mean_length": 1795.9944580078125,
"completions/mean_terminated_length": 1802.4992919921874,
"completions/min_length": 0.0,
"completions/min_terminated_length": 569.6,
"epoch": 2.3551955600554995,
"grad_norm": 0.0023362021893262863,
"learning_rate": 3.6057692307692306e-07,
"loss": -0.0079,
"num_tokens": 2947693457.0,
"reward": 1.024513053894043,
"reward_std": 0.09492753744125366,
"rewards/accuracy_reward": 0.7426215291023255,
"rewards/brier_reward": 0.8439577341079711,
"rewards/confidence_uniqueness_reward": 0.9420446038246155,
"rewards/format_reward": 0.9963541626930237,
"rewards/frontier_coverage_0": 0.01960038566030562,
"rewards/frontier_coverage_1": 0.01960038566030562,
"rewards/frontier_coverage_10": 0.0827787920832634,
"rewards/frontier_coverage_15": 0.15930041372776033,
"rewards/frontier_coverage_20": 0.2687810301780701,
"rewards/frontier_coverage_25": 0.3554062366485596,
"rewards/frontier_coverage_5": 0.022010414488613607,
"rewards/frontier_entropy_batch_reward": -0.36837931871414187,
"signal/accuracy_reward/centered_abs_mean": 0.09831271767616272,
"signal/accuracy_reward/group_std_mean": 0.13480441719293595,
"signal/accuracy_reward/group_zero_std_frac": 0.5944444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.873695683479309,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04915635883808136,
"signal/advantage_abs_mean": 0.7633371233940125,
"signal/advantage_pre_scale_abs_mean": 0.07063713744282722,
"signal/advantage_pre_scale_std": 0.12072601765394211,
"signal/advantage_std": 0.9826791763305665,
"signal/brier_reward/centered_abs_mean": 0.10119090974330902,
"signal/brier_reward/group_std_mean": 0.13194870799779893,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18225338459014892,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010119091346859932,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021581395342946053,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030861319229006767,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03918079622089863,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002158139576204121,
"signal/format_reward/centered_abs_mean": 0.006477864505723119,
"signal/format_reward/group_std_mean": 0.012327943369746209,
"signal/format_reward/group_zero_std_frac": 0.950000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05790370739996433,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0032389322528615593,
"signal/frontier_coverage_0/centered_abs_mean": 0.13417006731033326,
"signal/frontier_coverage_0/group_std_mean": 0.17341846227645874,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034544138610363005,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019186319317668677,
"signal/frontier_coverage_1/centered_abs_mean": 0.13417006731033326,
"signal/frontier_coverage_1/group_std_mean": 0.17341846227645874,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034544138610363005,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019186319317668677,
"signal/frontier_coverage_10/centered_abs_mean": 0.06774536669254302,
"signal/frontier_coverage_10/group_std_mean": 0.08314146995544433,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0175961634144187,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009687587735243142,
"signal/frontier_coverage_15/centered_abs_mean": 0.09616223573684693,
"signal/frontier_coverage_15/group_std_mean": 0.11858321875333785,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024944596737623215,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013751199934631587,
"signal/frontier_coverage_20/centered_abs_mean": 0.13780849874019624,
"signal/frontier_coverage_20/group_std_mean": 0.17135028541088104,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035683315992355344,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019706616178154944,
"signal/frontier_coverage_25/centered_abs_mean": 0.17244452834129334,
"signal/frontier_coverage_25/group_std_mean": 0.21541389226913452,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04460631459951401,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00246595679782331,
"signal/frontier_coverage_5/centered_abs_mean": 0.11228355765342712,
"signal/frontier_coverage_5/group_std_mean": 0.14591508209705353,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.028914512321352958,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016056548804044724,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33653807640075684,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3984680354595184,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6106127738952637,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03365381136536598,
"step": 980
},
{
"calibration/aurc": 0.0753109634174734,
"calibration/batch_distribution_entropy": 0.9480128859763612,
"calibration/batch_entropy_100bins": 0.9484348062409662,
"calibration/batch_entropy_10bins": 0.9480128859763612,
"calibration/batch_entropy_50bins": 0.9575373289371351,
"calibration/batch_uniqueness": 0.948753060265835,
"calibration/buffer_distribution_entropy": 0.9667386152705809,
"calibration/buffer_entropy_100bins": 0.9821239026124247,
"calibration/buffer_entropy_10bins": 0.9667386152705809,
"calibration/buffer_entropy_50bins": 0.9797207122682059,
"calibration/confidence_entropy": 0.4779945092334944,
"calibration/coverage@0%": 0.23802083333333335,
"calibration/coverage@1%": 0.2598958333333333,
"calibration/coverage@10%": 0.6843192449956484,
"calibration/coverage@15%": 0.886958768494343,
"calibration/coverage@20%": 0.934375,
"calibration/coverage@25%": 0.9619791666666668,
"calibration/coverage@30%": 0.9765625,
"calibration/coverage@5%": 0.5290279590948651,
"calibration/distribution_entropy_10": 0.9480128859763612,
"calibration/distribution_entropy_100": 0.9484348062409662,
"calibration/ece": 0.19405203231070495,
"calibration/mean_confidence": 0.6091119354873803,
"calibration/unique_confidence_per_question": 0.7791666666666666,
"calibration/unique_confidences": 299.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0026041666666666964,
"completions/max_length": 3950.8,
"completions/max_terminated_length": 3950.8,
"completions/mean_length": 1796.334765625,
"completions/mean_terminated_length": 1801.1148193359375,
"completions/min_length": 130.6,
"completions/min_terminated_length": 563.8,
"epoch": 2.3671954100573744,
"grad_norm": 0.0022584237158298492,
"learning_rate": 3.305288461538462e-07,
"loss": -0.0057,
"num_tokens": 2971492257.0,
"reward": 1.0601597785949708,
"reward_std": 0.08923779428005219,
"rewards/accuracy_reward": 0.8031249880790711,
"rewards/brier_reward": 0.8480790615081787,
"rewards/confidence_uniqueness_reward": 0.9452744245529174,
"rewards/format_reward": 0.9973958253860473,
"rewards/frontier_coverage_0": -0.015677616419270634,
"rewards/frontier_coverage_1": -0.015677616419270634,
"rewards/frontier_coverage_10": 0.08821566551923751,
"rewards/frontier_coverage_15": 0.17647169828414916,
"rewards/frontier_coverage_20": 0.2998010993003845,
"rewards/frontier_coverage_25": 0.39475311040878297,
"rewards/frontier_coverage_5": -0.002906990051269531,
"rewards/frontier_entropy_batch_reward": -0.32663238048553467,
"signal/accuracy_reward/centered_abs_mean": 0.09223090261220931,
"signal/accuracy_reward/group_std_mean": 0.12533488720655442,
"signal/accuracy_reward/group_zero_std_frac": 0.6194444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8646474599838256,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04611545130610466,
"signal/advantage_abs_mean": 0.7746138095855712,
"signal/advantage_pre_scale_abs_mean": 0.06815963685512542,
"signal/advantage_pre_scale_std": 0.11540376543998718,
"signal/advantage_std": 0.9826080322265625,
"signal/brier_reward/centered_abs_mean": 0.09722411036491393,
"signal/brier_reward/group_std_mean": 0.12465722560882568,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18319908082485198,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009722411073744297,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018586510978639125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0264458317309618,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034793031960725786,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018586511258035898,
"signal/format_reward/centered_abs_mean": 0.0044596354942768816,
"signal/format_reward/group_std_mean": 0.00908562783151865,
"signal/format_reward/group_zero_std_frac": 0.9583333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04040036499500275,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0022298177471384408,
"signal/frontier_coverage_0/centered_abs_mean": 0.13336831480264663,
"signal/frontier_coverage_0/group_std_mean": 0.17140342593193053,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035881773382425305,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019071669783443212,
"signal/frontier_coverage_1/centered_abs_mean": 0.13336831480264663,
"signal/frontier_coverage_1/group_std_mean": 0.17140342593193053,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035881773382425305,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019071669783443212,
"signal/frontier_coverage_10/centered_abs_mean": 0.0660796619951725,
"signal/frontier_coverage_10/group_std_mean": 0.08096126317977906,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017853300645947458,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009449392091482878,
"signal/frontier_coverage_15/centered_abs_mean": 0.09556379914283752,
"signal/frontier_coverage_15/group_std_mean": 0.11702702194452286,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025842766091227532,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013665623031556607,
"signal/frontier_coverage_20/centered_abs_mean": 0.13735153675079345,
"signal/frontier_coverage_20/group_std_mean": 0.1688213050365448,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03713957220315933,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001964126992970705,
"signal/frontier_coverage_25/centered_abs_mean": 0.16979779601097106,
"signal/frontier_coverage_25/group_std_mean": 0.2093061089515686,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04587937220931053,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002428108500316739,
"signal/frontier_coverage_5/centered_abs_mean": 0.11376264691352844,
"signal/frontier_coverage_5/group_std_mean": 0.14681884348392488,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030614623427391054,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016268058447167278,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3292146623134613,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39027782082557677,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6217843651771545,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03292146697640419,
"step": 985
},
{
"calibration/aurc": 0.058988793729192454,
"calibration/batch_distribution_entropy": 0.9364811635529179,
"calibration/batch_entropy_100bins": 0.9394896616791195,
"calibration/batch_entropy_10bins": 0.9364811635529179,
"calibration/batch_entropy_50bins": 0.9464707680021032,
"calibration/batch_uniqueness": 0.9467230902777777,
"calibration/buffer_distribution_entropy": 0.9667519942725857,
"calibration/buffer_entropy_100bins": 0.9823825593852069,
"calibration/buffer_entropy_10bins": 0.9667519942725857,
"calibration/buffer_entropy_50bins": 0.9798715333534596,
"calibration/confidence_entropy": 0.4910575180548623,
"calibration/coverage@0%": 0.24322916666666666,
"calibration/coverage@1%": 0.3567708333333333,
"calibration/coverage@10%": 0.8140625,
"calibration/coverage@15%": 0.8854166666666667,
"calibration/coverage@20%": 0.9416666666666668,
"calibration/coverage@25%": 0.9979166666666668,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.5692708333333333,
"calibration/distribution_entropy_10": 0.9364811635529179,
"calibration/distribution_entropy_100": 0.9394896616791195,
"calibration/ece": 0.20949534375,
"calibration/mean_confidence": 0.6143158645833333,
"calibration/unique_confidence_per_question": 0.7822916666666667,
"calibration/unique_confidences": 300.4,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002777777777777746,
"completions/max_length": 4015.6,
"completions/max_terminated_length": 4015.6,
"completions/mean_length": 1833.0742431640624,
"completions/mean_terminated_length": 1838.1804443359374,
"completions/min_length": 0.0,
"completions/min_terminated_length": 760.0,
"epoch": 2.3791952600592494,
"grad_norm": 0.002287256298586726,
"learning_rate": 3.0048076923076924e-07,
"loss": -0.0074,
"num_tokens": 2995705528.0,
"reward": 1.0458185434341432,
"reward_std": 0.09111074954271317,
"rewards/accuracy_reward": 0.7794270873069763,
"rewards/brier_reward": 0.8258747100830078,
"rewards/confidence_uniqueness_reward": 0.9460436582565308,
"rewards/format_reward": 0.9972222208976745,
"rewards/frontier_coverage_0": -0.020203251019120218,
"rewards/frontier_coverage_1": -0.020203251019120218,
"rewards/frontier_coverage_10": 0.08284911960363388,
"rewards/frontier_coverage_15": 0.17000916302204133,
"rewards/frontier_coverage_20": 0.2834066033363342,
"rewards/frontier_coverage_25": 0.35660980343818666,
"rewards/frontier_coverage_5": -0.003650544723495841,
"rewards/frontier_entropy_batch_reward": -0.3183602750301361,
"signal/accuracy_reward/centered_abs_mean": 0.0955023854970932,
"signal/accuracy_reward/group_std_mean": 0.13166273087263108,
"signal/accuracy_reward/group_zero_std_frac": 0.600000011920929,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8537281632423401,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0477511927485466,
"signal/advantage_abs_mean": 0.7701890587806701,
"signal/advantage_pre_scale_abs_mean": 0.06861250698566437,
"signal/advantage_pre_scale_std": 0.11559868305921554,
"signal/advantage_std": 0.9826963424682618,
"signal/brier_reward/centered_abs_mean": 0.10571289658546448,
"signal/brier_reward/group_std_mean": 0.13531849682331085,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18983431458473204,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010571289993822574,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01879030391573906,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0264260970056057,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03378798738121987,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018790304427966475,
"signal/format_reward/centered_abs_mean": 0.004947916697710752,
"signal/format_reward/group_std_mean": 0.009209575690329075,
"signal/format_reward/group_zero_std_frac": 0.9638888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0445366695523262,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002473958348855376,
"signal/frontier_coverage_0/centered_abs_mean": 0.14547624588012695,
"signal/frontier_coverage_0/group_std_mean": 0.18774136900901794,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03729048147797585,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020803103223443033,
"signal/frontier_coverage_1/centered_abs_mean": 0.14547624588012695,
"signal/frontier_coverage_1/group_std_mean": 0.18774136900901794,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03729048147797585,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020803103223443033,
"signal/frontier_coverage_10/centered_abs_mean": 0.06864608377218247,
"signal/frontier_coverage_10/group_std_mean": 0.08440887182950974,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01762332357466221,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009816389763727784,
"signal/frontier_coverage_15/centered_abs_mean": 0.09738789051771164,
"signal/frontier_coverage_15/group_std_mean": 0.12021335512399674,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02502138651907444,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001392646855674684,
"signal/frontier_coverage_20/centered_abs_mean": 0.13724444508552552,
"signal/frontier_coverage_20/group_std_mean": 0.1709604889154434,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03526832312345505,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019625954795628787,
"signal/frontier_coverage_25/centered_abs_mean": 0.16329463422298432,
"signal/frontier_coverage_25/group_std_mean": 0.20417420566082,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04194674119353294,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023351131472736596,
"signal/frontier_coverage_5/centered_abs_mean": 0.11565566658973694,
"signal/frontier_coverage_5/group_std_mean": 0.15015834867954253,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029612866416573524,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001653875899501145,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3260919272899628,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39335213899612426,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5854568719863892,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326091930270195,
"step": 990
},
{
"calibration/aurc": 0.14222156184323334,
"calibration/batch_distribution_entropy": 0.9415694556389482,
"calibration/batch_entropy_100bins": 0.9432832874756203,
"calibration/batch_entropy_10bins": 0.9415694556389482,
"calibration/batch_entropy_50bins": 0.9505710303745625,
"calibration/batch_uniqueness": 0.947582376897533,
"calibration/buffer_distribution_entropy": 0.9669185277208856,
"calibration/buffer_entropy_100bins": 0.9825735776521283,
"calibration/buffer_entropy_10bins": 0.9669185277208856,
"calibration/buffer_entropy_50bins": 0.9799605007398385,
"calibration/confidence_entropy": 0.4812596197164378,
"calibration/coverage@0%": 0.2971660139251523,
"calibration/coverage@1%": 0.33005330722367276,
"calibration/coverage@10%": 0.4866677545691907,
"calibration/coverage@15%": 0.5754011640557005,
"calibration/coverage@20%": 0.7131187445604874,
"calibration/coverage@25%": 0.8065437336814621,
"calibration/coverage@30%": 0.8456851066144473,
"calibration/coverage@5%": 0.3843274042645779,
"calibration/distribution_entropy_10": 0.9415694556389482,
"calibration/distribution_entropy_100": 0.9432832874756203,
"calibration/ece": 0.1949963851854874,
"calibration/mean_confidence": 0.5996266332272628,
"calibration/unique_confidence_per_question": 0.7765625,
"calibration/unique_confidences": 298.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002951388888888862,
"completions/max_length": 4036.0,
"completions/max_terminated_length": 4036.0,
"completions/mean_length": 1861.501123046875,
"completions/mean_terminated_length": 1867.0845458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 724.4,
"epoch": 2.3911951100611244,
"grad_norm": 0.0023452253080904484,
"learning_rate": 2.7043269230769233e-07,
"loss": -0.0073,
"num_tokens": 3020283909.0,
"reward": 1.0259827852249146,
"reward_std": 0.09166586697101593,
"rewards/accuracy_reward": 0.7354166626930236,
"rewards/brier_reward": 0.830725634098053,
"rewards/confidence_uniqueness_reward": 0.9464214205741882,
"rewards/format_reward": 0.9970486044883728,
"rewards/frontier_coverage_0": 0.018453091010451318,
"rewards/frontier_coverage_1": 0.018490078300237654,
"rewards/frontier_coverage_10": 0.08058418780565262,
"rewards/frontier_coverage_15": 0.15889862924814224,
"rewards/frontier_coverage_20": 0.26112279295921326,
"rewards/frontier_coverage_25": 0.33524413108825685,
"rewards/frontier_coverage_5": 0.021506571979261934,
"rewards/frontier_entropy_batch_reward": -0.30753050446510316,
"signal/accuracy_reward/centered_abs_mean": 0.0952799454331398,
"signal/accuracy_reward/group_std_mean": 0.12790547311306,
"signal/accuracy_reward/group_zero_std_frac": 0.6194444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.855156683921814,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0476399727165699,
"signal/advantage_abs_mean": 0.7717325329780579,
"signal/advantage_pre_scale_abs_mean": 0.0698886714875698,
"signal/advantage_pre_scale_std": 0.11719027608633041,
"signal/advantage_std": 0.9826955795288086,
"signal/brier_reward/centered_abs_mean": 0.10128604173660279,
"signal/brier_reward/group_std_mean": 0.1313249558210373,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18230506181716918,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010128603875637054,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01827959679067135,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026158673316240312,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03279260098934174,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018279596930369734,
"signal/format_reward/centered_abs_mean": 0.0046115451375953855,
"signal/format_reward/group_std_mean": 0.009491527453064919,
"signal/format_reward/group_zero_std_frac": 0.9555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04077572412788868,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0023057725687976927,
"signal/frontier_coverage_0/centered_abs_mean": 0.1416991651058197,
"signal/frontier_coverage_0/group_std_mean": 0.18187021017074584,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036441127955913546,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020262979669496417,
"signal/frontier_coverage_1/centered_abs_mean": 0.1416353702545166,
"signal/frontier_coverage_1/group_std_mean": 0.1817883223295212,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03642522916197777,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020253857830539344,
"signal/frontier_coverage_10/centered_abs_mean": 0.06796745508909226,
"signal/frontier_coverage_10/group_std_mean": 0.08356752395629882,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017518576234579086,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000971934583503753,
"signal/frontier_coverage_15/centered_abs_mean": 0.09602248519659043,
"signal/frontier_coverage_15/group_std_mean": 0.11829137653112412,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024734945222735404,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00137312151491642,
"signal/frontier_coverage_20/centered_abs_mean": 0.13371631503105164,
"signal/frontier_coverage_20/group_std_mean": 0.16593950092792512,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034411372244358064,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019121433142572641,
"signal/frontier_coverage_25/centered_abs_mean": 0.16211245357990264,
"signal/frontier_coverage_25/group_std_mean": 0.2016547739505768,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.041706757992506026,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002318208012729883,
"signal/frontier_coverage_5/centered_abs_mean": 0.12877790927886962,
"signal/frontier_coverage_5/group_std_mean": 0.16575416028499604,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03314310386776924,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018415240803733468,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33054853081703184,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39594523310661317,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5939931273460388,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03305485397577286,
"step": 995
},
{
"calibration/aurc": 0.07342741644024595,
"calibration/batch_distribution_entropy": 0.9301535869855403,
"calibration/batch_entropy_100bins": 0.9386370711182925,
"calibration/batch_entropy_10bins": 0.9301535869855403,
"calibration/batch_entropy_50bins": 0.9463011690348649,
"calibration/batch_uniqueness": 0.9467597241919155,
"calibration/buffer_distribution_entropy": 0.9674407780157438,
"calibration/buffer_entropy_100bins": 0.9828415632458573,
"calibration/buffer_entropy_10bins": 0.9674407780157438,
"calibration/buffer_entropy_50bins": 0.9802421205624169,
"calibration/confidence_entropy": 0.479119943513154,
"calibration/coverage@0%": 0.16233956684165451,
"calibration/coverage@1%": 0.2358736181906536,
"calibration/coverage@10%": 0.7203064184228933,
"calibration/coverage@15%": 0.8644619795381985,
"calibration/coverage@20%": 0.9070068389246331,
"calibration/coverage@25%": 0.9457938968668408,
"calibration/coverage@30%": 0.9781222802436901,
"calibration/coverage@5%": 0.6136452662966068,
"calibration/distribution_entropy_10": 0.9301535869855403,
"calibration/distribution_entropy_100": 0.9386370711182925,
"calibration/ece": 0.19834418927249478,
"calibration/mean_confidence": 0.6189614354133265,
"calibration/unique_confidence_per_question": 0.7708333333333334,
"calibration/unique_confidences": 296.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004166666666666674,
"completions/max_length": 3995.6,
"completions/max_terminated_length": 3995.6,
"completions/mean_length": 1884.9341064453124,
"completions/mean_terminated_length": 1892.77158203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 643.8,
"epoch": 2.4031949600629994,
"grad_norm": 0.0023564172443002462,
"learning_rate": 2.403846153846154e-07,
"loss": -0.0104,
"num_tokens": 3045098126.0,
"reward": 1.050121831893921,
"reward_std": 0.09569599479436874,
"rewards/accuracy_reward": 0.7814236044883728,
"rewards/brier_reward": 0.8372802495956421,
"rewards/confidence_uniqueness_reward": 0.9458718299865723,
"rewards/format_reward": 0.9958333373069763,
"rewards/frontier_coverage_0": -0.010130425938405097,
"rewards/frontier_coverage_1": -0.010074634104967117,
"rewards/frontier_coverage_10": 0.08780712187290192,
"rewards/frontier_coverage_15": 0.17775541841983794,
"rewards/frontier_coverage_20": 0.2861971020698547,
"rewards/frontier_coverage_25": 0.3673263430595398,
"rewards/frontier_coverage_5": -0.002790766826365143,
"rewards/frontier_entropy_batch_reward": -0.2963591665029526,
"signal/accuracy_reward/centered_abs_mean": 0.09832899421453475,
"signal/accuracy_reward/group_std_mean": 0.13242024779319764,
"signal/accuracy_reward/group_zero_std_frac": 0.6111111223697663,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8578180551528931,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04916449710726738,
"signal/advantage_abs_mean": 0.770215654373169,
"signal/advantage_pre_scale_abs_mean": 0.07169848531484604,
"signal/advantage_pre_scale_std": 0.12310145199298858,
"signal/advantage_std": 0.982709777355194,
"signal/brier_reward/centered_abs_mean": 0.09941399097442627,
"signal/brier_reward/group_std_mean": 0.12858400940895082,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17628815174102783,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009941398911178113,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02001577503979206,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03095482215285301,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.036119457334280014,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002001577545888722,
"signal/format_reward/centered_abs_mean": 0.007183159794658422,
"signal/format_reward/group_std_mean": 0.015246148407459258,
"signal/format_reward/group_zero_std_frac": 0.9305555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06603498458862304,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003591579897329211,
"signal/frontier_coverage_0/centered_abs_mean": 0.13947281688451768,
"signal/frontier_coverage_0/group_std_mean": 0.1804393172264099,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03531498908996582,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001994461310096085,
"signal/frontier_coverage_1/centered_abs_mean": 0.13939008265733718,
"signal/frontier_coverage_1/group_std_mean": 0.18033737540245057,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035294461995363235,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001993278227746487,
"signal/frontier_coverage_10/centered_abs_mean": 0.06610210686922073,
"signal/frontier_coverage_10/group_std_mean": 0.08138312101364135,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016847145184874533,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009452601661905646,
"signal/frontier_coverage_15/centered_abs_mean": 0.09497750997543335,
"signal/frontier_coverage_15/group_std_mean": 0.11707037836313247,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024168269336223604,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013581784442067146,
"signal/frontier_coverage_20/centered_abs_mean": 0.1326014831662178,
"signal/frontier_coverage_20/group_std_mean": 0.1643354892730713,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0336851567029953,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018962011905387043,
"signal/frontier_coverage_25/centered_abs_mean": 0.16154046058654786,
"signal/frontier_coverage_25/group_std_mean": 0.20086792409420012,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0409718930721283,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002310028485953808,
"signal/frontier_coverage_5/centered_abs_mean": 0.12495265901088715,
"signal/frontier_coverage_5/group_std_mean": 0.16214902102947235,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03164008669555187,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017868230119347573,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32641816735267637,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3914418339729309,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5837355196475983,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326418187469244,
"step": 1000
},
{
"epoch": 2.4031949600629994,
"eval_completions/clipped_ratio": 0.004340277777777772,
"eval_completions/max_length": 3791.6666666666665,
"eval_completions/max_terminated_length": 3791.6666666666665,
"eval_completions/mean_length": 1842.2066446940105,
"eval_completions/mean_terminated_length": 1850.1665445963542,
"eval_completions/min_length": 407.1666666666667,
"eval_completions/min_terminated_length": 795.0,
"eval_loss": 0.0,
"eval_num_tokens": 3045098126.0,
"eval_reward": 0.9361613194147745,
"eval_reward_std": 0.23841848721106848,
"eval_rewards/accuracy_reward": 0.7092013955116272,
"eval_rewards/brier_reward": 0.8246038556098938,
"eval_rewards/confidence_uniqueness_reward": 0.8930894037087759,
"eval_rewards/format_reward": 0.9947916666666666,
"eval_rewards/frontier_coverage_0": 0.01996202681524058,
"eval_rewards/frontier_coverage_1": 0.020027826928223174,
"eval_rewards/frontier_coverage_10": 0.07537480567892392,
"eval_rewards/frontier_coverage_15": 0.14895516633987427,
"eval_rewards/frontier_coverage_20": 0.23826486865679422,
"eval_rewards/frontier_coverage_25": 0.30555500090122223,
"eval_rewards/frontier_coverage_5": 0.02225215562308828,
"eval_rewards/frontier_entropy_batch_reward": -0.9947916666666666,
"eval_runtime": 220.4409,
"eval_samples_per_second": 4.536,
"eval_signal/accuracy_reward/centered_abs_mean": 0.3987087657054265,
"eval_signal/accuracy_reward/group_std_mean": 0.4515012751022975,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8477271099885305,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19935438285271326,
"eval_signal/advantage_abs_mean": 0.856895645459493,
"eval_signal/advantage_pre_scale_abs_mean": 0.20530925691127777,
"eval_signal/advantage_pre_scale_std": 0.23668034126361212,
"eval_signal/advantage_std": 0.9864044487476349,
"eval_signal/brier_reward/centered_abs_mean": 0.16098289688428244,
"eval_signal/brier_reward/group_std_mean": 0.21572668353716531,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06839290571709473,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.016098289905736845,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045053947096069656,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06295228935778141,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01909668557345867,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004505394880349438,
"eval_signal/format_reward/centered_abs_mean": 0.009982638681928316,
"eval_signal/format_reward/group_std_mean": 0.026473373795549076,
"eval_signal/format_reward/group_zero_std_frac": 0.8611111342906952,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02075567903618018,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.004991319340964158,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2538191005587578,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3522955924272537,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015475187761088213,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036296132020652294,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2536289890607198,
"eval_signal/frontier_coverage_1/group_std_mean": 0.35204460720221203,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015463725663721561,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003626894555054605,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.08963375041882198,
"eval_signal/frontier_coverage_10/group_std_mean": 0.11314565564195316,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005451245854298274,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012817625926497083,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.17246426890293756,
"eval_signal/frontier_coverage_15/group_std_mean": 0.21190873285134634,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010484680533409119,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002466239112739762,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2715127418438594,
"eval_signal/frontier_coverage_20/group_std_mean": 0.33038956423600513,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0165048170213898,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003882632163974146,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.345558096965154,
"eval_signal/frontier_coverage_25/group_std_mean": 0.4196178962786992,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021006828794876736,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004941480699926615,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.22168447573979697,
"eval_signal/frontier_coverage_5/group_std_mean": 0.31111370027065277,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.013517022288093964,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003170088049955666,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.009982638681928316,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.026473373795549076,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8611111342906952,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004151135838280122,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0009982639458030462,
"eval_steps_per_second": 0.027,
"step": 1000
},
{
"epoch": 2.4031949600629994,
"step": 1000,
"train_probe_completions/clipped_ratio": 0.00434027777777779,
"train_probe_completions/max_length": 3769.3333333333335,
"train_probe_completions/max_terminated_length": 3769.3333333333335,
"train_probe_completions/mean_length": 1853.1814575195312,
"train_probe_completions/mean_terminated_length": 1861.4977620442708,
"train_probe_completions/min_length": 473.6666666666667,
"train_probe_completions/min_terminated_length": 669.0,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 3045098126.0,
"train_probe_reward": 0.969299187262853,
"train_probe_reward_std": 0.21884569774071375,
"train_probe_rewards/accuracy_reward": 0.7690972288449606,
"train_probe_rewards/brier_reward": 0.8417787551879883,
"train_probe_rewards/confidence_uniqueness_reward": 0.8890791237354279,
"train_probe_rewards/format_reward": 0.9956597288449606,
"train_probe_rewards/frontier_coverage_0": -0.00047506617071727913,
"train_probe_rewards/frontier_coverage_1": -0.00046507261383036774,
"train_probe_rewards/frontier_coverage_10": 0.09062495206793149,
"train_probe_rewards/frontier_coverage_15": 0.1806212936838468,
"train_probe_rewards/frontier_coverage_20": 0.2890334626038869,
"train_probe_rewards/frontier_coverage_25": 0.3692951550086339,
"train_probe_rewards/frontier_coverage_5": 0.008488837241505584,
"train_probe_rewards/frontier_entropy_batch_reward": -0.9956597288449606,
"train_probe_runtime": 220.0235,
"train_probe_samples_per_second": 4.545,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.3460286458333333,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4203969786564509,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8049575587113699,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17301432291666666,
"train_probe_signal/advantage_abs_mean": 0.8016128440697988,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.17688036213318506,
"train_probe_signal/advantage_pre_scale_std": 0.2180818368991216,
"train_probe_signal/advantage_std": 0.9863627056280772,
"train_probe_signal/brier_reward/centered_abs_mean": 0.14901412775119147,
"train_probe_signal/brier_reward/group_std_mean": 0.20425448566675186,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06965736175576846,
"train_probe_signal/brier_reward/weight": 0.10000000149011612,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.014901412961383661,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04799235612154007,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.06400722078979015,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022248809846738975,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004799235767374436,
"train_probe_signal/format_reward/centered_abs_mean": 0.008083767102410397,
"train_probe_signal/format_reward/group_std_mean": 0.018047164815167587,
"train_probe_signal/format_reward/group_zero_std_frac": 0.9166666766007742,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.017749311091999214,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004041883551205198,
"train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.23642443617184958,
"train_probe_signal/frontier_coverage_0/group_std_mean": 0.3478074073791504,
"train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015785963740199804,
"train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033808692436044416,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.23625963926315308,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.34759581089019775,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01577526455124219,
"train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003378512842270235,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.09172458325823148,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.11395466451843579,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006129148804272215,
"train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001311661481546859,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.170408862332503,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.2058931663632393,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011379176595558723,
"train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024368467663104334,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.26180795580148697,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.31553854048252106,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017479725182056427,
"train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003743853730460008,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3305613547563553,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.3989069660504659,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022070841242869694,
"train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004727027301366131,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.20856821288665137,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3089133898417155,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.013924311380833387,
"train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002982525465389093,
"train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008083767102410397,
"train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.018047164815167587,
"train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666766007742,
"train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0035498624201864004,
"train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008083767024800181,
"train_probe_steps_per_second": 0.027
},
{
"calibration/aurc": 0.049643029974327034,
"calibration/batch_distribution_entropy": 0.9358887317593924,
"calibration/batch_entropy_100bins": 0.9399941518547305,
"calibration/batch_entropy_10bins": 0.9358887317593924,
"calibration/batch_entropy_50bins": 0.9483285575618018,
"calibration/batch_uniqueness": 0.9475488333796699,
"calibration/buffer_distribution_entropy": 0.9649044556260031,
"calibration/buffer_entropy_100bins": 0.9816726170018539,
"calibration/buffer_entropy_10bins": 0.9649044556260031,
"calibration/buffer_entropy_50bins": 0.9788240110362392,
"calibration/confidence_entropy": 0.47691834991219684,
"calibration/coverage@0%": 0.14653612571448732,
"calibration/coverage@1%": 0.16585727453955262,
"calibration/coverage@10%": 0.8903184908145743,
"calibration/coverage@15%": 0.948948814186249,
"calibration/coverage@20%": 0.9921875,
"calibration/coverage@25%": 1.0,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.6731427813257123,
"calibration/distribution_entropy_10": 0.9358887317593924,
"calibration/distribution_entropy_100": 0.9399941518547305,
"calibration/ece": 0.2326867390651095,
"calibration/mean_confidence": 0.6246838768994191,
"calibration/unique_confidence_per_question": 0.7864583333333333,
"calibration/unique_confidences": 302.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00538194444444442,
"completions/max_length": 4071.8,
"completions/max_terminated_length": 4071.8,
"completions/mean_length": 1868.3620849609374,
"completions/mean_terminated_length": 1878.50234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 730.4,
"epoch": 2.4151948100648744,
"grad_norm": 0.0023259855806827545,
"learning_rate": 2.103365384615385e-07,
"loss": -0.0129,
"num_tokens": 3069731321.0,
"reward": 1.052335834503174,
"reward_std": 0.09243645370006562,
"rewards/accuracy_reward": 0.7889756917953491,
"rewards/brier_reward": 0.8465770006179809,
"rewards/confidence_uniqueness_reward": 0.9425639510154724,
"rewards/format_reward": 0.9947048664093018,
"rewards/frontier_coverage_0": -0.0034056782722473146,
"rewards/frontier_coverage_1": -0.003370976075530052,
"rewards/frontier_coverage_10": 0.09778977483510971,
"rewards/frontier_coverage_15": 0.19524939060211183,
"rewards/frontier_coverage_20": 0.31109591722488406,
"rewards/frontier_coverage_25": 0.390661233663559,
"rewards/frontier_coverage_5": 0.003890213742852211,
"rewards/frontier_entropy_batch_reward": -0.3260291278362274,
"signal/accuracy_reward/centered_abs_mean": 0.1012424036860466,
"signal/accuracy_reward/group_std_mean": 0.13545745313167573,
"signal/accuracy_reward/group_zero_std_frac": 0.6027777791023254,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9566566586494446,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0506212018430233,
"signal/advantage_abs_mean": 0.7642920255661011,
"signal/advantage_pre_scale_abs_mean": 0.06975356489419937,
"signal/advantage_pre_scale_std": 0.12066361606121064,
"signal/advantage_std": 0.9825953960418701,
"signal/brier_reward/centered_abs_mean": 0.10595771223306656,
"signal/brier_reward/group_std_mean": 0.13505614250898362,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20111228227615358,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010595771297812463,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021636403724551202,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03017391674220562,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04097634702920914,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021636404329910875,
"signal/format_reward/centered_abs_mean": 0.008251953125,
"signal/format_reward/group_std_mean": 0.013600048422813416,
"signal/format_reward/group_zero_std_frac": 0.95,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07726817056536675,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0041259765625,
"signal/frontier_coverage_0/centered_abs_mean": 0.15206801891326904,
"signal/frontier_coverage_0/group_std_mean": 0.19249917864799498,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04130175411701202,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002174572693184018,
"signal/frontier_coverage_1/centered_abs_mean": 0.15204941034317015,
"signal/frontier_coverage_1/group_std_mean": 0.19247425198554993,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04129683375358582,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021743066143244507,
"signal/frontier_coverage_10/centered_abs_mean": 0.07153294831514359,
"signal/frontier_coverage_10/group_std_mean": 0.08741232901811599,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019443374872207642,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010229211766272783,
"signal/frontier_coverage_15/centered_abs_mean": 0.10049240291118622,
"signal/frontier_coverage_15/group_std_mean": 0.12352342754602433,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027307916432619095,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014370413497090339,
"signal/frontier_coverage_20/centered_abs_mean": 0.13657326996326447,
"signal/frontier_coverage_20/group_std_mean": 0.1695472329854965,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03709420412778854,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019529977347701789,
"signal/frontier_coverage_25/centered_abs_mean": 0.16216370463371277,
"signal/frontier_coverage_25/group_std_mean": 0.2022677779197693,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04404643550515175,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023189409635961055,
"signal/frontier_coverage_5/centered_abs_mean": 0.12500394135713577,
"signal/frontier_coverage_5/group_std_mean": 0.15981516540050505,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03390644751489162,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017875563353300095,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3108845889568329,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3744558930397034,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5912355601787567,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03108845800161362,
"step": 1005
},
{
"calibration/aurc": 0.06281418079147369,
"calibration/batch_distribution_entropy": 0.9339591481381666,
"calibration/batch_entropy_100bins": 0.9388637498613687,
"calibration/batch_entropy_10bins": 0.9339591481381666,
"calibration/batch_entropy_50bins": 0.9467069144908493,
"calibration/batch_uniqueness": 0.946334471147605,
"calibration/buffer_distribution_entropy": 0.9648264661719301,
"calibration/buffer_entropy_100bins": 0.9816649220461823,
"calibration/buffer_entropy_10bins": 0.9648264661719301,
"calibration/buffer_entropy_50bins": 0.9787664691781185,
"calibration/confidence_entropy": 0.4745061170928304,
"calibration/coverage@0%": 0.12934893317023363,
"calibration/coverage@1%": 0.255000314806439,
"calibration/coverage@10%": 0.7184015285458769,
"calibration/coverage@15%": 0.9169174303399737,
"calibration/coverage@20%": 0.9853606975082749,
"calibration/coverage@25%": 1.0,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.5823624231615304,
"calibration/distribution_entropy_10": 0.9339591481381666,
"calibration/distribution_entropy_100": 0.9388637498613687,
"calibration/ece": 0.18922213956911352,
"calibration/mean_confidence": 0.6355573979566707,
"calibration/unique_confidence_per_question": 0.7739583333333333,
"calibration/unique_confidences": 297.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004166666666666674,
"completions/max_length": 4015.2,
"completions/max_terminated_length": 4015.2,
"completions/mean_length": 1842.25009765625,
"completions/mean_terminated_length": 1850.0492431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 612.0,
"epoch": 2.4271946600667493,
"grad_norm": 0.002243275521323085,
"learning_rate": 1.8028846153846153e-07,
"loss": -0.0105,
"num_tokens": 3094040186.0,
"reward": 1.0495539903640747,
"reward_std": 0.09262419492006302,
"rewards/accuracy_reward": 0.7880208373069764,
"rewards/brier_reward": 0.8300612449645997,
"rewards/confidence_uniqueness_reward": 0.9446048140525818,
"rewards/format_reward": 0.9958333253860474,
"rewards/frontier_coverage_0": -0.02462125839665532,
"rewards/frontier_coverage_1": -0.02462125839665532,
"rewards/frontier_coverage_10": 0.09222666025161744,
"rewards/frontier_coverage_15": 0.18372822403907776,
"rewards/frontier_coverage_20": 0.2927555561065674,
"rewards/frontier_coverage_25": 0.365255606174469,
"rewards/frontier_coverage_5": -0.008586358372122049,
"rewards/frontier_entropy_batch_reward": -0.3236843585968018,
"signal/accuracy_reward/centered_abs_mean": 0.08917100727558136,
"signal/accuracy_reward/group_std_mean": 0.12387789636850358,
"signal/accuracy_reward/group_zero_std_frac": 0.6277777671813964,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7768993377685547,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04458550363779068,
"signal/advantage_abs_mean": 0.7722596883773803,
"signal/advantage_pre_scale_abs_mean": 0.06895973756909371,
"signal/advantage_pre_scale_std": 0.11722018867731095,
"signal/advantage_std": 0.9827241063117981,
"signal/brier_reward/centered_abs_mean": 0.10290507674217224,
"signal/brier_reward/group_std_mean": 0.1320227026939392,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18089546859264374,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010290507972240449,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02060473933815956,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030566220358014106,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03596749491989613,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020604739896953107,
"signal/format_reward/centered_abs_mean": 0.0071289062383584675,
"signal/format_reward/group_std_mean": 0.013987554050982,
"signal/format_reward/group_zero_std_frac": 0.9388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06039946414530277,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0035644531191792337,
"signal/frontier_coverage_0/centered_abs_mean": 0.13577041774988174,
"signal/frontier_coverage_0/group_std_mean": 0.17625984847545623,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0340516809374094,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019415169022977351,
"signal/frontier_coverage_1/centered_abs_mean": 0.13577041774988174,
"signal/frontier_coverage_1/group_std_mean": 0.17625984847545623,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0340516809374094,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019415169022977351,
"signal/frontier_coverage_10/centered_abs_mean": 0.07094871997833252,
"signal/frontier_coverage_10/group_std_mean": 0.08687936514616013,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017919499427080154,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010145666543394326,
"signal/frontier_coverage_15/centered_abs_mean": 0.10347563177347183,
"signal/frontier_coverage_15/group_std_mean": 0.12728520035743712,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026157256960868836,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014797014882788062,
"signal/frontier_coverage_20/centered_abs_mean": 0.14272991716861724,
"signal/frontier_coverage_20/group_std_mean": 0.17653460204601287,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03607037365436554,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020410379394888876,
"signal/frontier_coverage_25/centered_abs_mean": 0.16851746439933776,
"signal/frontier_coverage_25/group_std_mean": 0.209286230802536,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.042574727535247804,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024097997695207594,
"signal/frontier_coverage_5/centered_abs_mean": 0.10969754308462143,
"signal/frontier_coverage_5/group_std_mean": 0.14257647544145585,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02749013453722,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015686748549342156,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3435045719146729,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40446537733078003,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6084434032440186,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03435045927762985,
"step": 1010
},
{
"calibration/aurc": 0.09016903645517856,
"calibration/batch_distribution_entropy": 0.9466750867360043,
"calibration/batch_entropy_100bins": 0.9441836862649339,
"calibration/batch_entropy_10bins": 0.9466750867360043,
"calibration/batch_entropy_50bins": 0.9534713268522423,
"calibration/batch_uniqueness": 0.9486876738949407,
"calibration/buffer_distribution_entropy": 0.9640184617127516,
"calibration/buffer_entropy_100bins": 0.9813054812950373,
"calibration/buffer_entropy_10bins": 0.9640184617127516,
"calibration/buffer_entropy_50bins": 0.9783259839610075,
"calibration/confidence_entropy": 0.489239218051625,
"calibration/coverage@0%": 0.05799001305483028,
"calibration/coverage@1%": 0.2185127502175805,
"calibration/coverage@10%": 0.5503759355961706,
"calibration/coverage@15%": 0.9104431278735434,
"calibration/coverage@20%": 0.9597774527360464,
"calibration/coverage@25%": 0.9775456919060052,
"calibration/coverage@30%": 0.9968668407310706,
"calibration/coverage@5%": 0.46811294604003484,
"calibration/distribution_entropy_10": 0.9466750867360043,
"calibration/distribution_entropy_100": 0.9441836862649339,
"calibration/ece": 0.22282431667164934,
"calibration/mean_confidence": 0.6071933231554596,
"calibration/unique_confidence_per_question": 0.7802083333333333,
"calibration/unique_confidences": 299.6,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009548611111111095,
"completions/max_length": 4002.0,
"completions/max_terminated_length": 4002.0,
"completions/mean_length": 1828.344921875,
"completions/mean_terminated_length": 1846.11103515625,
"completions/min_length": 123.0,
"completions/min_terminated_length": 656.0,
"epoch": 2.4391945100686243,
"grad_norm": 0.002343389904126525,
"learning_rate": 1.5024038461538462e-07,
"loss": -0.0218,
"num_tokens": 3118195487.0,
"reward": 1.0343999147415162,
"reward_std": 0.101786407828331,
"rewards/accuracy_reward": 0.7589409828186036,
"rewards/brier_reward": 0.8267424464225769,
"rewards/confidence_uniqueness_reward": 0.940627145767212,
"rewards/format_reward": 0.9904513955116272,
"rewards/frontier_coverage_0": -0.005277461744844914,
"rewards/frontier_coverage_1": -0.005277461744844914,
"rewards/frontier_coverage_10": 0.08828879594802856,
"rewards/frontier_coverage_15": 0.17405935227870942,
"rewards/frontier_coverage_20": 0.27904576659202573,
"rewards/frontier_coverage_25": 0.34634585976600646,
"rewards/frontier_coverage_5": 0.0002036154270172119,
"rewards/frontier_entropy_batch_reward": -0.2957990825176239,
"signal/accuracy_reward/centered_abs_mean": 0.09822591096162796,
"signal/accuracy_reward/group_std_mean": 0.13754905611276627,
"signal/accuracy_reward/group_zero_std_frac": 0.5722222268581391,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.821112871170044,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04911295548081398,
"signal/advantage_abs_mean": 0.7582108736038208,
"signal/advantage_pre_scale_abs_mean": 0.07610100358724595,
"signal/advantage_pre_scale_std": 0.13187426030635835,
"signal/advantage_std": 0.9827902913093567,
"signal/brier_reward/centered_abs_mean": 0.10396721214056015,
"signal/brier_reward/group_std_mean": 0.1354757845401764,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17589681446552277,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010396721586585046,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025327697582542896,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03629572652280331,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.042280444875359535,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002532769786193967,
"signal/format_reward/centered_abs_mean": 0.01250000037252903,
"signal/format_reward/group_std_mean": 0.020553291589021683,
"signal/format_reward/group_zero_std_frac": 0.925000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10179329812526702,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006250000186264515,
"signal/frontier_coverage_0/centered_abs_mean": 0.1361816868185997,
"signal/frontier_coverage_0/group_std_mean": 0.18098072111606597,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03291482552886009,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019473981345072388,
"signal/frontier_coverage_1/centered_abs_mean": 0.1361816868185997,
"signal/frontier_coverage_1/group_std_mean": 0.18098072111606597,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03291482552886009,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019473981345072388,
"signal/frontier_coverage_10/centered_abs_mean": 0.06957028657197953,
"signal/frontier_coverage_10/group_std_mean": 0.08603480309247971,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016844875738024713,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000994855083990842,
"signal/frontier_coverage_15/centered_abs_mean": 0.10170601159334183,
"signal/frontier_coverage_15/group_std_mean": 0.125798237323761,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02465054877102375,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014543959870934487,
"signal/frontier_coverage_20/centered_abs_mean": 0.14266515970230104,
"signal/frontier_coverage_20/group_std_mean": 0.17721356749534606,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034593602642416954,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002040111809037626,
"signal/frontier_coverage_25/centered_abs_mean": 0.16853521764278412,
"signal/frontier_coverage_25/group_std_mean": 0.20990538001060485,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04088501185178757,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002410053554922342,
"signal/frontier_coverage_5/centered_abs_mean": 0.11141620129346848,
"signal/frontier_coverage_5/group_std_mean": 0.14961472749710084,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02694331631064415,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015932516660541296,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.326166045665741,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3929603099822998,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5524879813194274,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03261660411953926,
"step": 1015
},
{
"calibration/aurc": 0.07510081039220799,
"calibration/batch_distribution_entropy": 0.9426630485683243,
"calibration/batch_entropy_100bins": 0.9435449820138105,
"calibration/batch_entropy_10bins": 0.9426630485683243,
"calibration/batch_entropy_50bins": 0.95117591907094,
"calibration/batch_uniqueness": 0.9488083222883464,
"calibration/buffer_distribution_entropy": 0.9647369329369576,
"calibration/buffer_entropy_100bins": 0.9816932900847481,
"calibration/buffer_entropy_10bins": 0.9647369329369576,
"calibration/buffer_entropy_50bins": 0.9787712595165315,
"calibration/confidence_entropy": 0.4916390496712138,
"calibration/coverage@0%": 0.28742112536503966,
"calibration/coverage@1%": 0.3936729031554595,
"calibration/coverage@10%": 0.5427528966131907,
"calibration/coverage@15%": 0.9198028074866311,
"calibration/coverage@20%": 0.9691510695187165,
"calibration/coverage@25%": 0.9930481283422459,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.4725573752228164,
"calibration/distribution_entropy_10": 0.9426630485683243,
"calibration/distribution_entropy_100": 0.9435449820138105,
"calibration/ece": 0.23388155342734285,
"calibration/mean_confidence": 0.6157962873395249,
"calibration/unique_confidence_per_question": 0.7807291666666666,
"calibration/unique_confidences": 299.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008506944444444465,
"completions/max_length": 4062.4,
"completions/max_terminated_length": 4062.4,
"completions/mean_length": 1823.9856689453125,
"completions/mean_terminated_length": 1839.737255859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 643.0,
"epoch": 2.4511943600704993,
"grad_norm": 0.0023279606830328703,
"learning_rate": 1.201923076923077e-07,
"loss": -0.0238,
"num_tokens": 3142309274.0,
"reward": 1.0500015020370483,
"reward_std": 0.10346215963363647,
"rewards/accuracy_reward": 0.7928819537162781,
"rewards/brier_reward": 0.821394145488739,
"rewards/confidence_uniqueness_reward": 0.941378140449524,
"rewards/format_reward": 0.9916666746139526,
"rewards/frontier_coverage_0": -0.03479338986799121,
"rewards/frontier_coverage_1": -0.03479338986799121,
"rewards/frontier_coverage_10": 0.08953844606876374,
"rewards/frontier_coverage_15": 0.18047432899475097,
"rewards/frontier_coverage_20": 0.29160410165786743,
"rewards/frontier_coverage_25": 0.36005922555923464,
"rewards/frontier_coverage_5": -0.021287964098155497,
"rewards/frontier_entropy_batch_reward": -0.3043049812316895,
"signal/accuracy_reward/centered_abs_mean": 0.11208767294883729,
"signal/accuracy_reward/group_std_mean": 0.14802851974964143,
"signal/accuracy_reward/group_zero_std_frac": 0.5722222328186035,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9433816790580749,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05604383647441864,
"signal/advantage_abs_mean": 0.7772905588150024,
"signal/advantage_pre_scale_abs_mean": 0.07995961681008339,
"signal/advantage_pre_scale_std": 0.1342957466840744,
"signal/advantage_std": 0.9827840328216553,
"signal/brier_reward/centered_abs_mean": 0.11277692764997482,
"signal/brier_reward/group_std_mean": 0.14259937554597854,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19096179604530333,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011277692764997483,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025512998178601264,
"signal/confidence_uniqueness_reward/group_std_mean": 0.035593613237142566,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.043344457447528836,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002551299869082868,
"signal/format_reward/centered_abs_mean": 0.012912326585501432,
"signal/format_reward/group_std_mean": 0.02003680355846882,
"signal/format_reward/group_zero_std_frac": 0.9305555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10890447869896888,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006456163292750716,
"signal/frontier_coverage_0/centered_abs_mean": 0.1563192069530487,
"signal/frontier_coverage_0/group_std_mean": 0.1986013501882553,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03789141923189163,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002235364611260593,
"signal/frontier_coverage_1/centered_abs_mean": 0.1563192069530487,
"signal/frontier_coverage_1/group_std_mean": 0.1986013501882553,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03789141923189163,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002235364611260593,
"signal/frontier_coverage_10/centered_abs_mean": 0.07052088975906372,
"signal/frontier_coverage_10/group_std_mean": 0.08744210004806519,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017230145074427126,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001008448691572994,
"signal/frontier_coverage_15/centered_abs_mean": 0.10222131013870239,
"signal/frontier_coverage_15/group_std_mean": 0.12673643082380295,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02497452460229397,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014617647510021925,
"signal/frontier_coverage_20/centered_abs_mean": 0.14299859404563903,
"signal/frontier_coverage_20/group_std_mean": 0.1779682904481888,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034896204993128774,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00204487987793982,
"signal/frontier_coverage_25/centered_abs_mean": 0.16885097622871398,
"signal/frontier_coverage_25/group_std_mean": 0.210318660736084,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04115983694791794,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002414568979293108,
"signal/frontier_coverage_5/centered_abs_mean": 0.1253449410200119,
"signal/frontier_coverage_5/group_std_mean": 0.16036904454231263,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03035188913345337,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00179243260063231,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3257643938064575,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3911713778972626,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5545288920402527,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0325764387845993,
"step": 1020
},
{
"calibration/aurc": 0.10795433323355885,
"calibration/batch_distribution_entropy": 0.954471539417737,
"calibration/batch_entropy_100bins": 0.9521645413868827,
"calibration/batch_entropy_10bins": 0.954471539417737,
"calibration/batch_entropy_50bins": 0.9614803987982079,
"calibration/batch_uniqueness": 0.9506998628756316,
"calibration/buffer_distribution_entropy": 0.9642103525562659,
"calibration/buffer_entropy_100bins": 0.9814568581448638,
"calibration/buffer_entropy_10bins": 0.9642103525562659,
"calibration/buffer_entropy_50bins": 0.9784806514041853,
"calibration/confidence_entropy": 0.4912213062999834,
"calibration/coverage@0%": 0.1714014889154245,
"calibration/coverage@1%": 0.177656942667606,
"calibration/coverage@10%": 0.5418318083204239,
"calibration/coverage@15%": 0.5912203710796203,
"calibration/coverage@20%": 0.9185154155495979,
"calibration/coverage@25%": 0.9672922252010723,
"calibration/coverage@30%": 0.979088471849866,
"calibration/coverage@5%": 0.3355723281353475,
"calibration/distribution_entropy_10": 0.954471539417737,
"calibration/distribution_entropy_100": 0.9521645413868827,
"calibration/ece": 0.2106986534151861,
"calibration/mean_confidence": 0.5992449490097288,
"calibration/unique_confidence_per_question": 0.7973958333333333,
"calibration/unique_confidences": 306.2,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004947916666666674,
"completions/max_length": 4053.6,
"completions/max_terminated_length": 4053.6,
"completions/mean_length": 1831.3671875,
"completions/mean_terminated_length": 1840.537353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 570.8,
"epoch": 2.4631942100723743,
"grad_norm": 0.0021897871047258377,
"learning_rate": 9.014423076923076e-08,
"loss": -0.0114,
"num_tokens": 3166493728.0,
"reward": 1.0358627796173097,
"reward_std": 0.0949734017252922,
"rewards/accuracy_reward": 0.7576388955116272,
"rewards/brier_reward": 0.8228162169456482,
"rewards/confidence_uniqueness_reward": 0.9453029632568359,
"rewards/format_reward": 0.9950520873069764,
"rewards/frontier_coverage_0": -0.010437600314617157,
"rewards/frontier_coverage_1": -0.010437600314617157,
"rewards/frontier_coverage_10": 0.08798636645078659,
"rewards/frontier_coverage_15": 0.17348833680152892,
"rewards/frontier_coverage_20": 0.2779244124889374,
"rewards/frontier_coverage_25": 0.33984118700027466,
"rewards/frontier_coverage_5": -0.0003456904669292271,
"rewards/frontier_entropy_batch_reward": -0.29564343094825746,
"signal/accuracy_reward/centered_abs_mean": 0.09862196296453477,
"signal/accuracy_reward/group_std_mean": 0.13283505588769912,
"signal/accuracy_reward/group_zero_std_frac": 0.6138888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8716125965118409,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04931098148226738,
"signal/advantage_abs_mean": 0.768799102306366,
"signal/advantage_pre_scale_abs_mean": 0.07138014510273934,
"signal/advantage_pre_scale_std": 0.12297854572534561,
"signal/advantage_std": 0.982682466506958,
"signal/brier_reward/centered_abs_mean": 0.10478228777647018,
"signal/brier_reward/group_std_mean": 0.1331074982881546,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1881021410226822,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010478229075670243,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02070125788450241,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030240644142031668,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03780189417302608,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020701258908957243,
"signal/format_reward/centered_abs_mean": 0.00837131068110466,
"signal/format_reward/group_std_mean": 0.014943964406847953,
"signal/format_reward/group_zero_std_frac": 0.9416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07720507308840752,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00418565534055233,
"signal/frontier_coverage_0/centered_abs_mean": 0.14610558599233628,
"signal/frontier_coverage_0/group_std_mean": 0.18713459372520447,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03730213642120361,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020893098786473273,
"signal/frontier_coverage_1/centered_abs_mean": 0.14610558599233628,
"signal/frontier_coverage_1/group_std_mean": 0.18713459372520447,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03730213642120361,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020893098786473273,
"signal/frontier_coverage_10/centered_abs_mean": 0.06919336915016175,
"signal/frontier_coverage_10/group_std_mean": 0.0849489226937294,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017900803312659263,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009894651593640447,
"signal/frontier_coverage_15/centered_abs_mean": 0.09901081472635269,
"signal/frontier_coverage_15/group_std_mean": 0.12232684940099717,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02565161548554897,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014158546226099133,
"signal/frontier_coverage_20/centered_abs_mean": 0.13749481439590455,
"signal/frontier_coverage_20/group_std_mean": 0.17131005227565765,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03556998260319233,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001966175786219537,
"signal/frontier_coverage_25/centered_abs_mean": 0.16092342138290405,
"signal/frontier_coverage_25/group_std_mean": 0.2009727656841278,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04158979952335358,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002301204949617386,
"signal/frontier_coverage_5/centered_abs_mean": 0.1154853418469429,
"signal/frontier_coverage_5/group_std_mean": 0.14849618971347808,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029463668912649156,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016514403512701392,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31661018133163454,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38219080567359925,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5740918219089508,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03166101947426796,
"step": 1025
},
{
"calibration/aurc": 0.12602570302866045,
"calibration/batch_distribution_entropy": 0.9447044630714128,
"calibration/batch_entropy_100bins": 0.9450190257070968,
"calibration/batch_entropy_10bins": 0.9447044630714128,
"calibration/batch_entropy_50bins": 0.9548580318251819,
"calibration/batch_uniqueness": 0.9488180854166666,
"calibration/buffer_distribution_entropy": 0.9654555801025856,
"calibration/buffer_entropy_100bins": 0.982137694828989,
"calibration/buffer_entropy_10bins": 0.9654555801025856,
"calibration/buffer_entropy_50bins": 0.9792643436738526,
"calibration/confidence_entropy": 0.47917161620424187,
"calibration/coverage@0%": 0.035741666666666665,
"calibration/coverage@1%": 0.13920833333333332,
"calibration/coverage@10%": 0.6481708333333333,
"calibration/coverage@15%": 0.7228749999999999,
"calibration/coverage@20%": 0.8481375,
"calibration/coverage@25%": 0.9088416666666668,
"calibration/coverage@30%": 0.9526041666666666,
"calibration/coverage@5%": 0.29895,
"calibration/distribution_entropy_10": 0.9447044630714128,
"calibration/distribution_entropy_100": 0.9450190257070968,
"calibration/ece": 0.1854570408333333,
"calibration/mean_confidence": 0.5931998533333334,
"calibration/unique_confidence_per_question": 0.7885416666666667,
"calibration/unique_confidences": 302.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002517361111111116,
"completions/max_length": 4025.0,
"completions/max_terminated_length": 4025.0,
"completions/mean_length": 1796.7112060546874,
"completions/mean_terminated_length": 1801.3041748046876,
"completions/min_length": 0.0,
"completions/min_terminated_length": 648.6,
"epoch": 2.4751940600742492,
"grad_norm": 0.0023873134050518274,
"learning_rate": 6.009615384615386e-08,
"loss": -0.0018,
"num_tokens": 3190264289.0,
"reward": 1.0420259952545166,
"reward_std": 0.0918187752366066,
"rewards/accuracy_reward": 0.7673611164093017,
"rewards/brier_reward": 0.8428851127624511,
"rewards/confidence_uniqueness_reward": 0.9457682132720947,
"rewards/format_reward": 0.9973958253860473,
"rewards/frontier_coverage_0": 0.005533659388311208,
"rewards/frontier_coverage_1": 0.005533659388311208,
"rewards/frontier_coverage_10": 0.09484143853187561,
"rewards/frontier_coverage_15": 0.18530669808387756,
"rewards/frontier_coverage_20": 0.29715303182601926,
"rewards/frontier_coverage_25": 0.36473381519317627,
"rewards/frontier_coverage_5": 0.018825782649219035,
"rewards/frontier_entropy_batch_reward": -0.3311637341976166,
"signal/accuracy_reward/centered_abs_mean": 0.09390190988779068,
"signal/accuracy_reward/group_std_mean": 0.12828511744737625,
"signal/accuracy_reward/group_zero_std_frac": 0.6111111044883728,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8142975091934204,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04695095494389534,
"signal/advantage_abs_mean": 0.7726901888847351,
"signal/advantage_pre_scale_abs_mean": 0.07007159218192101,
"signal/advantage_pre_scale_std": 0.11733129620552063,
"signal/advantage_std": 0.9827264785766602,
"signal/brier_reward/centered_abs_mean": 0.10260952860116959,
"signal/brier_reward/group_std_mean": 0.13155785202980042,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17991337776184083,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01026095375418663,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01803735364228487,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02397899702191353,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032087193056941035,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018037353875115514,
"signal/format_reward/centered_abs_mean": 0.004112413222901523,
"signal/format_reward/group_std_mean": 0.006730147963389754,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.036464104615151885,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0020562066114507616,
"signal/frontier_coverage_0/centered_abs_mean": 0.1377152234315872,
"signal/frontier_coverage_0/group_std_mean": 0.17628149390220643,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03444495052099228,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019693276146426795,
"signal/frontier_coverage_1/centered_abs_mean": 0.1377152234315872,
"signal/frontier_coverage_1/group_std_mean": 0.17628149390220643,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03444495052099228,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019693276146426795,
"signal/frontier_coverage_10/centered_abs_mean": 0.07171624302864074,
"signal/frontier_coverage_10/group_std_mean": 0.08833199888467788,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018152038007974623,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010255422326736151,
"signal/frontier_coverage_15/centered_abs_mean": 0.10491779446601868,
"signal/frontier_coverage_15/group_std_mean": 0.1298350602388382,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026540745049715042,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015003244625404477,
"signal/frontier_coverage_20/centered_abs_mean": 0.1449584811925888,
"signal/frontier_coverage_20/group_std_mean": 0.18116262555122375,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03661221191287041,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020729063078761103,
"signal/frontier_coverage_25/centered_abs_mean": 0.16970057189464569,
"signal/frontier_coverage_25/group_std_mean": 0.21266899406909942,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04281158521771431,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024267181288450956,
"signal/frontier_coverage_5/centered_abs_mean": 0.10611386597156525,
"signal/frontier_coverage_5/group_std_mean": 0.13786848783493041,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026468663662672042,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015174282249063253,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3376332998275757,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3988232672214508,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5964804947376251,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03376332968473435,
"step": 1030
},
{
"calibration/aurc": 0.07552295319038503,
"calibration/batch_distribution_entropy": 0.9133538504038856,
"calibration/batch_entropy_100bins": 0.9286687311084607,
"calibration/batch_entropy_10bins": 0.9133538504038856,
"calibration/batch_entropy_50bins": 0.9350509932222222,
"calibration/batch_uniqueness": 0.9436407235880271,
"calibration/buffer_distribution_entropy": 0.9652011082369188,
"calibration/buffer_entropy_100bins": 0.9820486659725232,
"calibration/buffer_entropy_10bins": 0.9652011082369188,
"calibration/buffer_entropy_50bins": 0.9791281607291051,
"calibration/confidence_entropy": 0.4885732382130474,
"calibration/coverage@0%": 0.05645285087719297,
"calibration/coverage@1%": 0.187702850877193,
"calibration/coverage@10%": 0.6769682017543859,
"calibration/coverage@15%": 0.9291940789473685,
"calibration/coverage@20%": 0.9579934210526316,
"calibration/coverage@25%": 0.9836842105263159,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.558108552631579,
"calibration/distribution_entropy_10": 0.9133538504038856,
"calibration/distribution_entropy_100": 0.9286687311084607,
"calibration/ece": 0.18881821655701753,
"calibration/mean_confidence": 0.6570069150219299,
"calibration/unique_confidence_per_question": 0.7671874999999999,
"calibration/unique_confidences": 294.6,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003472222222222232,
"completions/max_length": 4008.2,
"completions/max_terminated_length": 4008.2,
"completions/mean_length": 1852.3521728515625,
"completions/mean_terminated_length": 1858.8249755859374,
"completions/min_length": 0.0,
"completions/min_terminated_length": 582.2,
"epoch": 2.4871939100761242,
"grad_norm": 0.002268389565870166,
"learning_rate": 3.004807692307693e-08,
"loss": -0.0093,
"num_tokens": 3214658554.0,
"reward": 1.054084062576294,
"reward_std": 0.09638428539037705,
"rewards/accuracy_reward": 0.7923611044883728,
"rewards/brier_reward": 0.8475759506225586,
"rewards/confidence_uniqueness_reward": 0.9440353751182556,
"rewards/format_reward": 0.9964409828186035,
"rewards/frontier_coverage_0": -0.008416316658258437,
"rewards/frontier_coverage_1": -0.008416316658258437,
"rewards/frontier_coverage_10": 0.10146128982305527,
"rewards/frontier_coverage_15": 0.19958109259605408,
"rewards/frontier_coverage_20": 0.31772214770317075,
"rewards/frontier_coverage_25": 0.390204930305481,
"rewards/frontier_coverage_5": -0.0028042953927069902,
"rewards/frontier_entropy_batch_reward": -0.3362554252147675,
"signal/accuracy_reward/centered_abs_mean": 0.09750434011220932,
"signal/accuracy_reward/group_std_mean": 0.13808932304382324,
"signal/accuracy_reward/group_zero_std_frac": 0.5722222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8525140166282654,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04875217005610466,
"signal/advantage_abs_mean": 0.7589220523834228,
"signal/advantage_pre_scale_abs_mean": 0.07075085788965225,
"signal/advantage_pre_scale_std": 0.12341239303350449,
"signal/advantage_std": 0.9827309489250183,
"signal/brier_reward/centered_abs_mean": 0.09787903875112533,
"signal/brier_reward/group_std_mean": 0.12877348363399505,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1719258725643158,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009787904098629951,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020282436907291413,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02916300855576992,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03589392341673374,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020282438024878504,
"signal/format_reward/centered_abs_mean": 0.006396484514698386,
"signal/format_reward/group_std_mean": 0.012048396095633507,
"signal/format_reward/group_zero_std_frac": 0.9527777671813965,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.056607330590486525,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003198242257349193,
"signal/frontier_coverage_0/centered_abs_mean": 0.13380325138568877,
"signal/frontier_coverage_0/group_std_mean": 0.1769975781440735,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033473866805434224,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001913386443629861,
"signal/frontier_coverage_1/centered_abs_mean": 0.13380325138568877,
"signal/frontier_coverage_1/group_std_mean": 0.1769975781440735,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033473866805434224,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001913386443629861,
"signal/frontier_coverage_10/centered_abs_mean": 0.07177521139383317,
"signal/frontier_coverage_10/group_std_mean": 0.08854316622018814,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018115800246596335,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010263855452649294,
"signal/frontier_coverage_15/centered_abs_mean": 0.10529633611440659,
"signal/frontier_coverage_15/group_std_mean": 0.13048094213008882,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026583028957247734,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015057375887408853,
"signal/frontier_coverage_20/centered_abs_mean": 0.14539185762405396,
"signal/frontier_coverage_20/group_std_mean": 0.1816335141658783,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03668516799807549,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002079103607684374,
"signal/frontier_coverage_25/centered_abs_mean": 0.17040335834026338,
"signal/frontier_coverage_25/group_std_mean": 0.21383727490901946,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04297814220190048,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024367680307477713,
"signal/frontier_coverage_5/centered_abs_mean": 0.10470791161060333,
"signal/frontier_coverage_5/group_std_mean": 0.13967494815587997,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026209026202559473,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014973230892792345,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33285818696022035,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3948968529701233,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.586955726146698,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033285819739103314,
"step": 1035
},
{
"calibration/aurc": 0.03982850277120601,
"calibration/batch_distribution_entropy": 0.9302262732060675,
"calibration/batch_entropy_100bins": 0.9370089211715671,
"calibration/batch_entropy_10bins": 0.9302262732060675,
"calibration/batch_entropy_50bins": 0.9452935932980246,
"calibration/batch_uniqueness": 0.9455695415080067,
"calibration/buffer_distribution_entropy": 0.9635296927704001,
"calibration/buffer_entropy_100bins": 0.9812695589033449,
"calibration/buffer_entropy_10bins": 0.9635296927704001,
"calibration/buffer_entropy_50bins": 0.9781823252528057,
"calibration/confidence_entropy": 0.48144740059044216,
"calibration/coverage@0%": 0.2527846128608924,
"calibration/coverage@1%": 0.3907275262467192,
"calibration/coverage@10%": 0.8651861876640421,
"calibration/coverage@15%": 0.91529281496063,
"calibration/coverage@20%": 0.9555077099737532,
"calibration/coverage@25%": 0.9942503280839894,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.779650590551181,
"calibration/distribution_entropy_10": 0.9302262732060675,
"calibration/distribution_entropy_100": 0.9370089211715671,
"calibration/ece": 0.1816495291994751,
"calibration/mean_confidence": 0.640207217027559,
"calibration/unique_confidence_per_question": 0.778125,
"calibration/unique_confidences": 298.8,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0052951388888889065,
"completions/max_length": 3966.4,
"completions/max_terminated_length": 3966.4,
"completions/mean_length": 1817.649072265625,
"completions/mean_terminated_length": 1827.4730712890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 563.0,
"epoch": 2.499193760077999,
"grad_norm": 0.002182691590860486,
"learning_rate": 0.0,
"loss": -0.005,
"num_tokens": 3238689743.0,
"reward": 1.0494820594787597,
"reward_std": 0.09593940526247025,
"rewards/accuracy_reward": 0.7894965171813965,
"rewards/brier_reward": 0.8297081470489502,
"rewards/confidence_uniqueness_reward": 0.9430023312568665,
"rewards/format_reward": 0.9947048664093018,
"rewards/frontier_coverage_0": -0.026781286112964153,
"rewards/frontier_coverage_1": -0.026781286112964153,
"rewards/frontier_coverage_10": 0.09526022970676422,
"rewards/frontier_coverage_15": 0.1908439964056015,
"rewards/frontier_coverage_20": 0.3055948555469513,
"rewards/frontier_coverage_25": 0.36994290351867676,
"rewards/frontier_coverage_5": -0.010583885153755546,
"rewards/frontier_entropy_batch_reward": -0.32723876237869265,
"signal/accuracy_reward/centered_abs_mean": 0.10203450620174408,
"signal/accuracy_reward/group_std_mean": 0.14063651114702225,
"signal/accuracy_reward/group_zero_std_frac": 0.575000011920929,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8835547685623169,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05101725310087204,
"signal/advantage_abs_mean": 0.7627329468727112,
"signal/advantage_pre_scale_abs_mean": 0.0720044381916523,
"signal/advantage_pre_scale_std": 0.12101912498474121,
"signal/advantage_std": 0.9827530026435852,
"signal/brier_reward/centered_abs_mean": 0.10083793252706527,
"signal/brier_reward/group_std_mean": 0.12944591790437698,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17524527311325072,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010083793476223946,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019239266216754914,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027547023445367812,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03336756303906441,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019239266403019428,
"signal/format_reward/centered_abs_mean": 0.0053222656948491934,
"signal/format_reward/group_std_mean": 0.010332421585917472,
"signal/format_reward/group_zero_std_frac": 0.9555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.045553749427199364,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0026611328474245967,
"signal/frontier_coverage_0/centered_abs_mean": 0.14174142628908157,
"signal/frontier_coverage_0/group_std_mean": 0.1843595564365387,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03516431301832199,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002026902325451374,
"signal/frontier_coverage_1/centered_abs_mean": 0.14174142628908157,
"signal/frontier_coverage_1/group_std_mean": 0.1843595564365387,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03516431301832199,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002026902325451374,
"signal/frontier_coverage_10/centered_abs_mean": 0.07119416147470474,
"signal/frontier_coverage_10/group_std_mean": 0.0873028039932251,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01772012934088707,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010180765297263862,
"signal/frontier_coverage_15/centered_abs_mean": 0.10372219681739807,
"signal/frontier_coverage_15/group_std_mean": 0.1279784396290779,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02581692300736904,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014832273591309786,
"signal/frontier_coverage_20/centered_abs_mean": 0.14510032236576081,
"signal/frontier_coverage_20/group_std_mean": 0.1804075062274933,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.036104710400104524,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020749345421791077,
"signal/frontier_coverage_25/centered_abs_mean": 0.1685394436120987,
"signal/frontier_coverage_25/group_std_mean": 0.21030722558498383,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04193039983510971,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002410113997757435,
"signal/frontier_coverage_5/centered_abs_mean": 0.11676243543624878,
"signal/frontier_coverage_5/group_std_mean": 0.15255253612995148,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02898463122546673,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016697028186172247,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3297681212425232,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39378581047058103,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5740349888801575,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03297681398689747,
"step": 1040
},
{
"epoch": 2.499193760077999,
"step": 1040,
"total_flos": 0.0,
"train_loss": -0.0007628972309668405,
"train_runtime": 27173.5486,
"train_samples_per_second": 2.76,
"train_steps_per_second": 0.038
}
],
"logging_steps": 5,
"max_steps": 1040,
"num_input_tokens_seen": 3238689743,
"num_train_epochs": 3,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}