Files
RLCR-v4-ks-uniqueness-cov0-…/trainer_state.json
ModelHub XC f21693b838 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-noece-noaurc-scaletrue-cold-math
Source: Original Platform
2026-05-28 15:11:52 +08:00

5721 lines
358 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49919376007799904,
"eval_steps": 50,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.4883207070890415,
"calibration/batch_distribution_entropy": 0.2739739421553503,
"calibration/confidence_entropy": 0.21793248029268142,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4604362091577833,
"calibration/mean_confidence": 0.9143221468537565,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018576388888888906,
"completions/max_length": 3895.6,
"completions/max_terminated_length": 3895.6,
"completions/mean_length": 514.4408813476563,
"completions/mean_terminated_length": 524.181884765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011999850001874977,
"grad_norm": 0.0081259123980999,
"learning_rate": 5.952380952380953e-07,
"loss": 0.0056,
"num_tokens": 9040567.0,
"reward": 0.4569155514240265,
"reward_std": 0.41827074289321897,
"rewards/accuracy_reward": 0.2575520783662796,
"rewards/brier_reward": 0.30908964276313783,
"rewards/confidence_uniqueness_reward": 0.28769826889038086,
"rewards/format_reward": 0.5966145753860473,
"rewards/frontier_coverage_0": 0.27184249460697174,
"rewards/frontier_coverage_1": 0.27184249460697174,
"rewards/frontier_coverage_10": 0.27184249460697174,
"rewards/frontier_coverage_15": 0.27184249460697174,
"rewards/frontier_coverage_20": 0.27184249460697174,
"rewards/frontier_coverage_25": 0.27184249460697174,
"rewards/frontier_coverage_5": 0.27184249460697174,
"rewards/frontier_entropy_batch_reward": -0.5705800533294678,
"signal/accuracy_reward/centered_abs_mean": 0.30725369453430174,
"signal/accuracy_reward/group_std_mean": 0.3699012637138367,
"signal/accuracy_reward/group_zero_std_frac": 0.07500000149011612,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3921299993991852,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15362684726715087,
"signal/advantage_abs_mean": 0.848686158657074,
"signal/advantage_pre_scale_abs_mean": 0.35823245644569396,
"signal/advantage_pre_scale_std": 0.42261629104614257,
"signal/advantage_std": 0.9842132687568664,
"signal/brier_reward/centered_abs_mean": 0.3175659000873566,
"signal/brier_reward/group_std_mean": 0.37283719182014463,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08106742650270463,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03175659067928791,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23240519165992737,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2853622674942017,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05935205966234207,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02324051931500435,
"signal/format_reward/centered_abs_mean": 0.44161783456802367,
"signal/format_reward/group_std_mean": 0.4756269872188568,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5638803482055664,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.22080891728401184,
"signal/frontier_coverage_0/centered_abs_mean": 0.3085132300853729,
"signal/frontier_coverage_0/group_std_mean": 0.36870989203453064,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011261269636452197,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004411739017814398,
"signal/frontier_coverage_1/centered_abs_mean": 0.3085132300853729,
"signal/frontier_coverage_1/group_std_mean": 0.36870989203453064,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011261269636452197,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004411739017814398,
"signal/frontier_coverage_10/centered_abs_mean": 0.3085132300853729,
"signal/frontier_coverage_10/group_std_mean": 0.36870989203453064,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011261269636452197,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004411739017814398,
"signal/frontier_coverage_15/centered_abs_mean": 0.3085132300853729,
"signal/frontier_coverage_15/group_std_mean": 0.36870989203453064,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011261269636452197,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004411739017814398,
"signal/frontier_coverage_20/centered_abs_mean": 0.3085132300853729,
"signal/frontier_coverage_20/group_std_mean": 0.36870989203453064,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011261269636452197,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004411739017814398,
"signal/frontier_coverage_25/centered_abs_mean": 0.3085132300853729,
"signal/frontier_coverage_25/group_std_mean": 0.36870989203453064,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.011261269636452197,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004411739017814398,
"signal/frontier_coverage_5/centered_abs_mean": 0.3085132300853729,
"signal/frontier_coverage_5/group_std_mean": 0.36870989203453064,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.011261269636452197,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004411739017814398,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4511567711830139,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.48259199857711793,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.1152160570025444,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.045115678757429126,
"step": 5
},
{
"calibration/aurc": 0.5107523789601409,
"calibration/batch_distribution_entropy": 0.24915467457321486,
"calibration/confidence_entropy": 0.21520335761112702,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4658758918099065,
"calibration/mean_confidence": 0.922980520374389,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01883680555555558,
"completions/max_length": 3971.0,
"completions/max_terminated_length": 3971.0,
"completions/mean_length": 469.39539794921876,
"completions/mean_terminated_length": 478.6153076171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 22.4,
"epoch": 0.023999700003749954,
"grad_norm": 0.05667172372341156,
"learning_rate": 1.1904761904761906e-06,
"loss": 0.0066,
"num_tokens": 17530722.0,
"reward": 0.5542843520641327,
"reward_std": 0.3886994063854218,
"rewards/accuracy_reward": 0.301215273141861,
"rewards/brier_reward": 0.3664989948272705,
"rewards/confidence_uniqueness_reward": 0.3651871979236603,
"rewards/format_reward": 0.7378472208976745,
"rewards/frontier_coverage_0": 0.3178509533405304,
"rewards/frontier_coverage_1": 0.3178509533405304,
"rewards/frontier_coverage_10": 0.3178509533405304,
"rewards/frontier_coverage_15": 0.3178509533405304,
"rewards/frontier_coverage_20": 0.3178509533405304,
"rewards/frontier_coverage_25": 0.3178509533405304,
"rewards/frontier_coverage_5": 0.3178509533405304,
"rewards/frontier_entropy_batch_reward": -0.7023240089416504,
"signal/accuracy_reward/centered_abs_mean": 0.3270073771476746,
"signal/accuracy_reward/group_std_mean": 0.38548147678375244,
"signal/accuracy_reward/group_zero_std_frac": 0.06666666865348816,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.4557223439216614,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1635036885738373,
"signal/advantage_abs_mean": 0.8083321452140808,
"signal/advantage_pre_scale_abs_mean": 0.32132325768470765,
"signal/advantage_pre_scale_std": 0.392959201335907,
"signal/advantage_std": 0.984190571308136,
"signal/brier_reward/centered_abs_mean": 0.32042253017425537,
"signal/brier_reward/group_std_mean": 0.3735620677471161,
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08921760171651841,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.032042254135012625,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2168998122215271,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2739565551280975,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060243000835180284,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02168998159468174,
"signal/format_reward/centered_abs_mean": 0.3335828959941864,
"signal/format_reward/group_std_mean": 0.40515110492706297,
"signal/format_reward/group_zero_std_frac": 0.00555555559694767,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.45944651365280154,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1667914479970932,
"signal/frontier_coverage_0/centered_abs_mean": 0.3215973138809204,
"signal/frontier_coverage_0/group_std_mean": 0.37801494002342223,
"signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01281338632106781,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0045988415367901325,
"signal/frontier_coverage_1/centered_abs_mean": 0.3215973138809204,
"signal/frontier_coverage_1/group_std_mean": 0.37801494002342223,
"signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01281338632106781,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0045988415367901325,
"signal/frontier_coverage_10/centered_abs_mean": 0.3215973138809204,
"signal/frontier_coverage_10/group_std_mean": 0.37801494002342223,
"signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01281338632106781,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0045988415367901325,
"signal/frontier_coverage_15/centered_abs_mean": 0.3215973138809204,
"signal/frontier_coverage_15/group_std_mean": 0.37801494002342223,
"signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01281338632106781,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0045988415367901325,
"signal/frontier_coverage_20/centered_abs_mean": 0.3215973138809204,
"signal/frontier_coverage_20/group_std_mean": 0.37801494002342223,
"signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01281338632106781,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045988415367901325,
"signal/frontier_coverage_25/centered_abs_mean": 0.3215973138809204,
"signal/frontier_coverage_25/group_std_mean": 0.37801494002342223,
"signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01281338632106781,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0045988415367901325,
"signal/frontier_coverage_5/centered_abs_mean": 0.3215973138809204,
"signal/frontier_coverage_5/group_std_mean": 0.37801494002342223,
"signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01281338632106781,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0045988415367901325,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36690880060195924,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4326904654502869,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10138487070798874,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03669087961316109,
"step": 10
},
{
"calibration/aurc": 0.5439928649912942,
"calibration/batch_distribution_entropy": 0.3275892998380167,
"calibration/confidence_entropy": 0.26077720370647295,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4911188693830021,
"calibration/mean_confidence": 0.9035915660059434,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009895833333333326,
"completions/max_length": 3997.8,
"completions/max_terminated_length": 3997.8,
"completions/mean_length": 426.6123352050781,
"completions/mean_terminated_length": 430.9161010742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 32.4,
"epoch": 0.03599955000562493,
"grad_norm": 0.017714520916342735,
"learning_rate": 1.7857142857142859e-06,
"loss": -0.0218,
"num_tokens": 25547280.0,
"reward": 0.6818203568458557,
"reward_std": 0.2930204153060913,
"rewards/accuracy_reward": 0.3302083373069763,
"rewards/brier_reward": 0.44146730899810793,
"rewards/confidence_uniqueness_reward": 0.5243119597434998,
"rewards/format_reward": 0.9497395992279053,
"rewards/frontier_coverage_0": 0.3611275374889374,
"rewards/frontier_coverage_1": 0.3611275374889374,
"rewards/frontier_coverage_10": 0.3611275374889374,
"rewards/frontier_coverage_15": 0.3611275374889374,
"rewards/frontier_coverage_20": 0.3611275374889374,
"rewards/frontier_coverage_25": 0.3611275374889374,
"rewards/frontier_coverage_5": 0.3611275374889374,
"rewards/frontier_entropy_batch_reward": -0.9088038682937623,
"signal/accuracy_reward/centered_abs_mean": 0.31500651240348815,
"signal/accuracy_reward/group_std_mean": 0.37460089921951295,
"signal/accuracy_reward/group_zero_std_frac": 0.09166666865348816,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6708433270454407,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15750325620174407,
"signal/advantage_abs_mean": 0.769465982913971,
"signal/advantage_pre_scale_abs_mean": 0.23625607192516326,
"signal/advantage_pre_scale_std": 0.30175902843475344,
"signal/advantage_std": 0.9840420842170715,
"signal/brier_reward/centered_abs_mean": 0.2925845801830292,
"signal/brier_reward/group_std_mean": 0.3448193073272705,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12465540021657943,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02925845831632614,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19031396508216858,
"signal/confidence_uniqueness_reward/group_std_mean": 0.23621676564216615,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0817145824432373,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01903139688074589,
"signal/format_reward/centered_abs_mean": 0.08739691749215125,
"signal/format_reward/group_std_mean": 0.15843217223882675,
"signal/format_reward/group_zero_std_frac": 0.3861111253499985,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17899880260229112,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.04369845874607563,
"signal/frontier_coverage_0/centered_abs_mean": 0.30582007169723513,
"signal/frontier_coverage_0/group_std_mean": 0.3615089595317841,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.018638250604271888,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004373226827010512,
"signal/frontier_coverage_1/centered_abs_mean": 0.30582007169723513,
"signal/frontier_coverage_1/group_std_mean": 0.3615089595317841,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.018638250604271888,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004373226827010512,
"signal/frontier_coverage_10/centered_abs_mean": 0.30582007169723513,
"signal/frontier_coverage_10/group_std_mean": 0.3615089595317841,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018638250604271888,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004373226827010512,
"signal/frontier_coverage_15/centered_abs_mean": 0.30582007169723513,
"signal/frontier_coverage_15/group_std_mean": 0.3615089595317841,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018638250604271888,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004373226827010512,
"signal/frontier_coverage_20/centered_abs_mean": 0.30582007169723513,
"signal/frontier_coverage_20/group_std_mean": 0.3615089595317841,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018638250604271888,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004373226827010512,
"signal/frontier_coverage_25/centered_abs_mean": 0.30582007169723513,
"signal/frontier_coverage_25/group_std_mean": 0.3615089595317841,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018638250604271888,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004373226827010512,
"signal/frontier_coverage_5/centered_abs_mean": 0.30582007169723513,
"signal/frontier_coverage_5/group_std_mean": 0.3615089595317841,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.018638250604271888,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004373226827010512,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15383070558309556,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25853142738342283,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1277777798473835,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06458796337246894,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015383070893585682,
"step": 15
},
{
"calibration/aurc": 0.4556803994965267,
"calibration/batch_distribution_entropy": 0.5350678484304228,
"calibration/buffer_distribution_entropy": 0.3417969188964092,
"calibration/confidence_entropy": 0.3897717372305213,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0433420365535248,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3313420910147438,
"calibration/mean_confidence": 0.8364994562189102,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010156249999999978,
"completions/max_length": 3824.2,
"completions/max_terminated_length": 3824.2,
"completions/mean_length": 465.0840270996094,
"completions/mean_terminated_length": 469.90997924804685,
"completions/min_length": 0.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.04799940000749991,
"grad_norm": 0.014085509814321995,
"learning_rate": 2.380952380952381e-06,
"loss": -0.0318,
"num_tokens": 34018744.0,
"reward": 0.7619543671607971,
"reward_std": 0.23002811074256896,
"rewards/accuracy_reward": 0.43802083730697633,
"rewards/brier_reward": 0.5893322110176087,
"rewards/confidence_uniqueness_reward": 0.653409230709076,
"rewards/format_reward": 0.985850703716278,
"rewards/frontier_coverage_0": 0.19372253511101006,
"rewards/frontier_coverage_1": 0.19372253511101006,
"rewards/frontier_coverage_10": 0.19372253511101006,
"rewards/frontier_coverage_15": 0.19372253511101006,
"rewards/frontier_coverage_20": 0.19372253511101006,
"rewards/frontier_coverage_25": 0.19372253511101006,
"rewards/frontier_coverage_5": 0.19372253511101006,
"rewards/frontier_entropy_batch_reward": -0.9364717721939086,
"signal/accuracy_reward/centered_abs_mean": 0.2878797709941864,
"signal/accuracy_reward/group_std_mean": 0.35460472106933594,
"signal/accuracy_reward/group_zero_std_frac": 0.09444444626569748,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9924409985542297,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1439398854970932,
"signal/advantage_abs_mean": 0.7534375190734863,
"signal/advantage_pre_scale_abs_mean": 0.1824011266231537,
"signal/advantage_pre_scale_std": 0.23983034789562224,
"signal/advantage_std": 0.9837870955467224,
"signal/brier_reward/centered_abs_mean": 0.23643364608287812,
"signal/brier_reward/group_std_mean": 0.2890482544898987,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1609581083059311,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023643364757299425,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15150568187236785,
"signal/confidence_uniqueness_reward/group_std_mean": 0.18613037019968032,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09755237996578217,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015150568448007107,
"signal/format_reward/centered_abs_mean": 0.02616644911468029,
"signal/format_reward/group_std_mean": 0.058222611993551256,
"signal/format_reward/group_zero_std_frac": 0.7305555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08630450516939163,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013083224557340145,
"signal/frontier_coverage_0/centered_abs_mean": 0.14219243675470353,
"signal/frontier_coverage_0/group_std_mean": 0.18434092849493028,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012311214115470648,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002033351955469698,
"signal/frontier_coverage_1/centered_abs_mean": 0.14219243675470353,
"signal/frontier_coverage_1/group_std_mean": 0.18434092849493028,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012311214115470648,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002033351955469698,
"signal/frontier_coverage_10/centered_abs_mean": 0.14219243675470353,
"signal/frontier_coverage_10/group_std_mean": 0.18434092849493028,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012311214115470648,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002033351955469698,
"signal/frontier_coverage_15/centered_abs_mean": 0.14219243675470353,
"signal/frontier_coverage_15/group_std_mean": 0.18434092849493028,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012311214115470648,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002033351955469698,
"signal/frontier_coverage_20/centered_abs_mean": 0.14219243675470353,
"signal/frontier_coverage_20/group_std_mean": 0.18434092849493028,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012311214115470648,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002033351955469698,
"signal/frontier_coverage_25/centered_abs_mean": 0.14219243675470353,
"signal/frontier_coverage_25/group_std_mean": 0.18434092849493028,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012311214115470648,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002033351955469698,
"signal/frontier_coverage_5/centered_abs_mean": 0.14219243675470353,
"signal/frontier_coverage_5/group_std_mean": 0.18434092849493028,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012311214115470648,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002033351955469698,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11082728952169418,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.21301989257335663,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.25833334028720856,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.07670077979564667,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011082728952169418,
"step": 20
},
{
"calibration/aurc": 0.32297898002690983,
"calibration/batch_distribution_entropy": 0.6393962661945383,
"calibration/buffer_distribution_entropy": 0.475857881253323,
"calibration/confidence_entropy": 0.5880384168326668,
"calibration/coverage@0%": 0.00737848722179194,
"calibration/coverage@1%": 0.00737848722179194,
"calibration/coverage@10%": 0.021514612876242203,
"calibration/coverage@15%": 0.053983590903203496,
"calibration/coverage@20%": 0.21906205622592817,
"calibration/coverage@25%": 0.3800217791924946,
"calibration/coverage@30%": 0.4740219432344893,
"calibration/coverage@5%": 0.00737848722179194,
"calibration/ece": 0.13618263020369642,
"calibration/mean_confidence": 0.6823366961993423,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0109375,
"completions/max_length": 3916.0,
"completions/max_terminated_length": 3916.0,
"completions/mean_length": 551.5579833984375,
"completions/mean_terminated_length": 557.6826171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 100.2,
"epoch": 0.05999925000937488,
"grad_norm": 0.04915325343608856,
"learning_rate": 2.9761904761904763e-06,
"loss": -0.0291,
"num_tokens": 43497140.0,
"reward": 0.8319814205169678,
"reward_std": 0.1857275605201721,
"rewards/accuracy_reward": 0.5822916686534881,
"rewards/brier_reward": 0.7326545953750611,
"rewards/confidence_uniqueness_reward": 0.6608519792556763,
"rewards/format_reward": 0.9865451455116272,
"rewards/frontier_coverage_0": -0.014849835354834796,
"rewards/frontier_coverage_1": -0.014849835354834796,
"rewards/frontier_coverage_10": -0.014849835354834796,
"rewards/frontier_coverage_15": -0.014849835354834796,
"rewards/frontier_coverage_20": -0.014849835354834796,
"rewards/frontier_coverage_25": -0.014849835354834796,
"rewards/frontier_coverage_5": -0.014849835354834796,
"rewards/frontier_entropy_batch_reward": -0.9030117869377137,
"signal/accuracy_reward/centered_abs_mean": 0.24696180522441863,
"signal/accuracy_reward/group_std_mean": 0.3145732879638672,
"signal/accuracy_reward/group_zero_std_frac": 0.15555555671453475,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9943392634391784,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12348090261220931,
"signal/advantage_abs_mean": 0.7143722534179687,
"signal/advantage_pre_scale_abs_mean": 0.14123885333538055,
"signal/advantage_pre_scale_std": 0.19914124310016632,
"signal/advantage_std": 0.9836650371551514,
"signal/brier_reward/centered_abs_mean": 0.1310290887951851,
"signal/brier_reward/group_std_mean": 0.1702386736869812,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10493465960025787,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013102908991277218,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1781868025660515,
"signal/confidence_uniqueness_reward/group_std_mean": 0.21007861495018004,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.14442408829927444,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01781868040561676,
"signal/format_reward/centered_abs_mean": 0.02363823801279068,
"signal/format_reward/group_std_mean": 0.04480181857943535,
"signal/format_reward/group_zero_std_frac": 0.8166666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09280302375555038,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01181911900639534,
"signal/frontier_coverage_0/centered_abs_mean": 0.08157578110694885,
"signal/frontier_coverage_0/group_std_mean": 0.10721786618232727,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.009440916776657104,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011665337020531296,
"signal/frontier_coverage_1/centered_abs_mean": 0.08157578110694885,
"signal/frontier_coverage_1/group_std_mean": 0.10721786618232727,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.009440916776657104,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011665337020531296,
"signal/frontier_coverage_10/centered_abs_mean": 0.08157578110694885,
"signal/frontier_coverage_10/group_std_mean": 0.10721786618232727,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.009440916776657104,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011665337020531296,
"signal/frontier_coverage_15/centered_abs_mean": 0.08157578110694885,
"signal/frontier_coverage_15/group_std_mean": 0.10721786618232727,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.009440916776657104,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011665337020531296,
"signal/frontier_coverage_20/centered_abs_mean": 0.08157578110694885,
"signal/frontier_coverage_20/group_std_mean": 0.10721786618232727,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009440916776657104,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011665337020531296,
"signal/frontier_coverage_25/centered_abs_mean": 0.08157578110694885,
"signal/frontier_coverage_25/group_std_mean": 0.10721786618232727,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.009440916776657104,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011665337020531296,
"signal/frontier_coverage_5/centered_abs_mean": 0.08157578110694885,
"signal/frontier_coverage_5/group_std_mean": 0.10721786618232727,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.009440916776657104,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011665337020531296,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16028570830821992,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.27135405838489535,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.15833333898335694,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12926736772060393,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016028571128845214,
"step": 25
},
{
"calibration/aurc": 0.28283994378691124,
"calibration/batch_distribution_entropy": 0.8098021020102317,
"calibration/buffer_distribution_entropy": 0.6103832596029344,
"calibration/confidence_entropy": 0.5821678574801844,
"calibration/coverage@0%": 0.006513023807924029,
"calibration/coverage@1%": 0.006513023807924029,
"calibration/coverage@10%": 0.023857197249658447,
"calibration/coverage@15%": 0.03540575368010464,
"calibration/coverage@20%": 0.18533627087873336,
"calibration/coverage@25%": 0.4079531033741429,
"calibration/coverage@30%": 0.5827169258003846,
"calibration/coverage@5%": 0.01681112678895384,
"calibration/ece": 0.1065982830413967,
"calibration/mean_confidence": 0.6392839596712477,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015885416666666673,
"completions/max_length": 3756.2,
"completions/max_terminated_length": 3756.2,
"completions/mean_length": 629.6125,
"completions/mean_terminated_length": 639.7435302734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.6,
"epoch": 0.07199910001124986,
"grad_norm": 0.0025920316111296415,
"learning_rate": 3.5714285714285718e-06,
"loss": -0.0323,
"num_tokens": 53860196.0,
"reward": 0.8962351679801941,
"reward_std": 0.18339000940322875,
"rewards/accuracy_reward": 0.6081597208976746,
"rewards/brier_reward": 0.7350787162780762,
"rewards/confidence_uniqueness_reward": 0.8857907652854919,
"rewards/format_reward": 0.9823784708976746,
"rewards/frontier_coverage_0": -0.024395102635025978,
"rewards/frontier_coverage_1": -0.024395102635025978,
"rewards/frontier_coverage_10": -0.024395102635025978,
"rewards/frontier_coverage_15": -0.024395102635025978,
"rewards/frontier_coverage_20": -0.024395102635025978,
"rewards/frontier_coverage_25": -0.024395102635025978,
"rewards/frontier_coverage_5": -0.024395102635025978,
"rewards/frontier_entropy_batch_reward": -0.5867892920970916,
"signal/accuracy_reward/centered_abs_mean": 0.2335611939430237,
"signal/accuracy_reward/group_std_mean": 0.29245399236679076,
"signal/accuracy_reward/group_zero_std_frac": 0.23055555522441865,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.932023000717163,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11678059697151184,
"signal/advantage_abs_mean": 0.7472962260246276,
"signal/advantage_pre_scale_abs_mean": 0.1410281091928482,
"signal/advantage_pre_scale_std": 0.2018715351819992,
"signal/advantage_std": 0.9836687922477723,
"signal/brier_reward/centered_abs_mean": 0.1558063119649887,
"signal/brier_reward/group_std_mean": 0.19716133773326874,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12757135629653932,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015580631978809833,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07879094183444976,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10910149812698364,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060947873443365094,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00787909417413175,
"signal/format_reward/centered_abs_mean": 0.03021375872194767,
"signal/format_reward/group_std_mean": 0.056160366535186766,
"signal/format_reward/group_zero_std_frac": 0.7750000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12186049222946167,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.015106879360973834,
"signal/frontier_coverage_0/centered_abs_mean": 0.12939045429229737,
"signal/frontier_coverage_0/group_std_mean": 0.17340194284915925,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015195189043879509,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018502835649996997,
"signal/frontier_coverage_1/centered_abs_mean": 0.12939045429229737,
"signal/frontier_coverage_1/group_std_mean": 0.17340194284915925,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015195189043879509,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018502835649996997,
"signal/frontier_coverage_10/centered_abs_mean": 0.12939045429229737,
"signal/frontier_coverage_10/group_std_mean": 0.17340194284915925,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015195189043879509,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018502835649996997,
"signal/frontier_coverage_15/centered_abs_mean": 0.12939045429229737,
"signal/frontier_coverage_15/group_std_mean": 0.17340194284915925,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015195189043879509,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018502835649996997,
"signal/frontier_coverage_20/centered_abs_mean": 0.12939045429229737,
"signal/frontier_coverage_20/group_std_mean": 0.17340194284915925,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015195189043879509,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018502835649996997,
"signal/frontier_coverage_25/centered_abs_mean": 0.12939045429229737,
"signal/frontier_coverage_25/group_std_mean": 0.17340194284915925,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015195189043879509,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018502835649996997,
"signal/frontier_coverage_5/centered_abs_mean": 0.12939045429229737,
"signal/frontier_coverage_5/group_std_mean": 0.17340194284915925,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015195189043879509,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018502835649996997,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4059493899345398,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4744054675102234,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.32940598130226134,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04059493914246559,
"step": 30
},
{
"calibration/aurc": 0.26349164441341744,
"calibration/batch_distribution_entropy": 0.9722011088416853,
"calibration/buffer_distribution_entropy": 0.7147490484587777,
"calibration/confidence_entropy": 0.4716542473597789,
"calibration/coverage@0%": 0.010033409527924178,
"calibration/coverage@1%": 0.010033409527924178,
"calibration/coverage@10%": 0.04262926925598449,
"calibration/coverage@15%": 0.07320457015039024,
"calibration/coverage@20%": 0.11014567619634501,
"calibration/coverage@25%": 0.57302513015744,
"calibration/coverage@30%": 0.8417191254787083,
"calibration/coverage@5%": 0.010033409527924178,
"calibration/ece": 0.2408688157230487,
"calibration/mean_confidence": 0.5653689750040694,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02152777777777779,
"completions/max_length": 3726.4,
"completions/max_terminated_length": 3726.4,
"completions/mean_length": 640.2627685546875,
"completions/mean_terminated_length": 654.4486206054687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.8,
"epoch": 0.08399895001312484,
"grad_norm": 0.00288687227293849,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0519,
"num_tokens": 64313463.0,
"reward": 0.9324461936950683,
"reward_std": 0.1782814681529999,
"rewards/accuracy_reward": 0.6236111164093018,
"rewards/brier_reward": 0.6862263441085815,
"rewards/confidence_uniqueness_reward": 0.9285731554031372,
"rewards/format_reward": 0.9761284589767456,
"rewards/frontier_coverage_0": -0.047293629869818686,
"rewards/frontier_coverage_1": -0.047293629869818686,
"rewards/frontier_coverage_10": -0.047293629869818686,
"rewards/frontier_coverage_15": -0.047293629869818686,
"rewards/frontier_coverage_20": -0.047293629869818686,
"rewards/frontier_coverage_25": -0.047293629869818686,
"rewards/frontier_coverage_5": -0.047293629869818686,
"rewards/frontier_entropy_batch_reward": -0.2416945517063141,
"signal/accuracy_reward/centered_abs_mean": 0.2194552928209305,
"signal/accuracy_reward/group_std_mean": 0.28137104511260985,
"signal/accuracy_reward/group_zero_std_frac": 0.23055555522441865,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8508251547813416,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10972764641046524,
"signal/advantage_abs_mean": 0.747930896282196,
"signal/advantage_pre_scale_abs_mean": 0.13469484448432922,
"signal/advantage_pre_scale_std": 0.19468034505844117,
"signal/advantage_std": 0.983700430393219,
"signal/brier_reward/centered_abs_mean": 0.2539998531341553,
"signal/brier_reward/group_std_mean": 0.30160382986068723,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19768215715885162,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.025399985909461974,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047507094591856,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08019827008247375,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03670351468026638,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004750709515064955,
"signal/format_reward/centered_abs_mean": 0.0376356340944767,
"signal/format_reward/group_std_mean": 0.06864920854568482,
"signal/format_reward/group_zero_std_frac": 0.7277777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.14402690380811692,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01881781704723835,
"signal/frontier_coverage_0/centered_abs_mean": 0.2695829331874847,
"signal/frontier_coverage_0/group_std_mean": 0.3519932210445404,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029944488778710365,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038550359196960924,
"signal/frontier_coverage_1/centered_abs_mean": 0.2695829331874847,
"signal/frontier_coverage_1/group_std_mean": 0.3519932210445404,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029944488778710365,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038550359196960924,
"signal/frontier_coverage_10/centered_abs_mean": 0.2695829331874847,
"signal/frontier_coverage_10/group_std_mean": 0.3519932210445404,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029944488778710365,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038550359196960924,
"signal/frontier_coverage_15/centered_abs_mean": 0.2695829331874847,
"signal/frontier_coverage_15/group_std_mean": 0.3519932210445404,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029944488778710365,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038550359196960924,
"signal/frontier_coverage_20/centered_abs_mean": 0.2695829331874847,
"signal/frontier_coverage_20/group_std_mean": 0.3519932210445404,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029944488778710365,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038550359196960924,
"signal/frontier_coverage_25/centered_abs_mean": 0.2695829331874847,
"signal/frontier_coverage_25/group_std_mean": 0.3519932210445404,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029944488778710365,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038550359196960924,
"signal/frontier_coverage_5/centered_abs_mean": 0.2695829331874847,
"signal/frontier_coverage_5/group_std_mean": 0.3519932210445404,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029944488778710365,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038550359196960924,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3358907103538513,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40937405824661255,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.26255030035972593,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033589070290327074,
"step": 35
},
{
"calibration/aurc": 0.2812089130282969,
"calibration/batch_distribution_entropy": 0.9378820973722213,
"calibration/buffer_distribution_entropy": 0.7788346026193121,
"calibration/confidence_entropy": 0.5052747431569691,
"calibration/coverage@0%": 0.011617889637609121,
"calibration/coverage@1%": 0.011617889637609121,
"calibration/coverage@10%": 0.02006814162348116,
"calibration/coverage@15%": 0.04699503081949964,
"calibration/coverage@20%": 0.21852156777167192,
"calibration/coverage@25%": 0.3002575847416842,
"calibration/coverage@30%": 0.6493828972901323,
"calibration/coverage@5%": 0.011617889637609121,
"calibration/ece": 0.18469349731283605,
"calibration/mean_confidence": 0.6265628071196891,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017881944444444443,
"completions/max_length": 3601.8,
"completions/max_terminated_length": 3601.8,
"completions/mean_length": 686.0500854492187,
"completions/mean_terminated_length": 698.4910034179687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.2,
"epoch": 0.09599880001499982,
"grad_norm": 0.0052315001375973225,
"learning_rate": 4.761904761904762e-06,
"loss": -0.0494,
"num_tokens": 75336280.0,
"reward": 0.9460026144981384,
"reward_std": 0.16873830258846284,
"rewards/accuracy_reward": 0.6516493201255799,
"rewards/brier_reward": 0.7267241477966309,
"rewards/confidence_uniqueness_reward": 0.9280878067016601,
"rewards/format_reward": 0.9811632037162781,
"rewards/frontier_coverage_0": -0.03641742318868637,
"rewards/frontier_coverage_1": -0.03641742318868637,
"rewards/frontier_coverage_10": -0.03641742318868637,
"rewards/frontier_coverage_15": -0.03641742318868637,
"rewards/frontier_coverage_20": -0.03641742318868637,
"rewards/frontier_coverage_25": -0.03641742318868637,
"rewards/frontier_coverage_5": -0.03641742318868637,
"rewards/frontier_entropy_batch_reward": -0.3223945081233978,
"signal/accuracy_reward/centered_abs_mean": 0.19728189706802368,
"signal/accuracy_reward/group_std_mean": 0.257595032453537,
"signal/accuracy_reward/group_zero_std_frac": 0.2861111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9428304195404053,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09864094853401184,
"signal/advantage_abs_mean": 0.7440566301345826,
"signal/advantage_pre_scale_abs_mean": 0.1264283075928688,
"signal/advantage_pre_scale_std": 0.19226027727127076,
"signal/advantage_std": 0.9835219383239746,
"signal/brier_reward/centered_abs_mean": 0.2110671579837799,
"signal/brier_reward/group_std_mean": 0.2574485570192337,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20208889842033387,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.021106715872883798,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04523418918251991,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07450791597366332,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.043287652730941775,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045234191231429575,
"signal/format_reward/centered_abs_mean": 0.03200412429869175,
"signal/format_reward/group_std_mean": 0.05935907438397407,
"signal/format_reward/group_zero_std_frac": 0.7611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.15341382324695588,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.016002062149345873,
"signal/frontier_coverage_0/centered_abs_mean": 0.1926664799451828,
"signal/frontier_coverage_0/group_std_mean": 0.26043030619621277,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02639569416642189,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002755130687728524,
"signal/frontier_coverage_1/centered_abs_mean": 0.1926664799451828,
"signal/frontier_coverage_1/group_std_mean": 0.26043030619621277,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02639569416642189,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002755130687728524,
"signal/frontier_coverage_10/centered_abs_mean": 0.1926664799451828,
"signal/frontier_coverage_10/group_std_mean": 0.26043030619621277,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02639569416642189,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002755130687728524,
"signal/frontier_coverage_15/centered_abs_mean": 0.1926664799451828,
"signal/frontier_coverage_15/group_std_mean": 0.26043030619621277,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02639569416642189,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002755130687728524,
"signal/frontier_coverage_20/centered_abs_mean": 0.1926664799451828,
"signal/frontier_coverage_20/group_std_mean": 0.26043030619621277,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02639569416642189,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002755130687728524,
"signal/frontier_coverage_25/centered_abs_mean": 0.1926664799451828,
"signal/frontier_coverage_25/group_std_mean": 0.26043030619621277,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02639569416642189,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002755130687728524,
"signal/frontier_coverage_5/centered_abs_mean": 0.1926664799451828,
"signal/frontier_coverage_5/group_std_mean": 0.26043030619621277,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02639569416642189,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002755130687728524,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38592681884765623,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4502368450164795,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.36914966702461244,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038592683523893355,
"step": 40
},
{
"calibration/aurc": 0.18182826496863658,
"calibration/batch_distribution_entropy": 0.9293780132057007,
"calibration/buffer_distribution_entropy": 0.8135788097220038,
"calibration/confidence_entropy": 0.4995505573094066,
"calibration/coverage@0%": 0.014873545187723889,
"calibration/coverage@1%": 0.014873545187723889,
"calibration/coverage@10%": 0.13937415970478578,
"calibration/coverage@15%": 0.3561157115232597,
"calibration/coverage@20%": 0.6580942309690091,
"calibration/coverage@25%": 0.9673320588192078,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.05969262413509231,
"calibration/ece": 0.16737471673810952,
"calibration/mean_confidence": 0.626975023284362,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013888888888888905,
"completions/max_length": 3784.0,
"completions/max_terminated_length": 3784.0,
"completions/mean_length": 739.9791625976562,
"completions/mean_terminated_length": 750.4744018554687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 252.6,
"epoch": 0.1079986500168748,
"grad_norm": 0.002621802967041731,
"learning_rate": 4.909638554216868e-06,
"loss": -0.0319,
"num_tokens": 86996104.0,
"reward": 0.9631305456161499,
"reward_std": 0.1516349971294403,
"rewards/accuracy_reward": 0.66796875,
"rewards/brier_reward": 0.762067437171936,
"rewards/confidence_uniqueness_reward": 0.9327586412429809,
"rewards/format_reward": 0.98515625,
"rewards/frontier_coverage_0": -0.008925668522715568,
"rewards/frontier_coverage_1": -0.008925668522715568,
"rewards/frontier_coverage_10": -0.008925668522715568,
"rewards/frontier_coverage_15": -0.008925668522715568,
"rewards/frontier_coverage_20": -0.008925668522715568,
"rewards/frontier_coverage_25": -0.008925668522715568,
"rewards/frontier_coverage_5": -0.008925668522715568,
"rewards/frontier_entropy_batch_reward": -0.3202110558748245,
"signal/accuracy_reward/centered_abs_mean": 0.18296983540058137,
"signal/accuracy_reward/group_std_mean": 0.24478627741336823,
"signal/accuracy_reward/group_zero_std_frac": 0.29722222983837127,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9449079632759094,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09148491770029069,
"signal/advantage_abs_mean": 0.7456163048744202,
"signal/advantage_pre_scale_abs_mean": 0.11277087926864623,
"signal/advantage_pre_scale_std": 0.1716623306274414,
"signal/advantage_std": 0.9834415912628174,
"signal/brier_reward/centered_abs_mean": 0.19162435531616212,
"signal/brier_reward/group_std_mean": 0.23700920343399048,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19831233322620392,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019162436202168464,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037688417732715605,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06137025505304337,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03864099867641926,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037688419222831728,
"signal/format_reward/centered_abs_mean": 0.02428927905857563,
"signal/format_reward/group_std_mean": 0.04575216062366962,
"signal/format_reward/group_zero_std_frac": 0.8083333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1223247617483139,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012144639529287815,
"signal/frontier_coverage_0/centered_abs_mean": 0.20233065783977508,
"signal/frontier_coverage_0/group_std_mean": 0.2709280252456665,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029919801652431487,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028933283407241105,
"signal/frontier_coverage_1/centered_abs_mean": 0.20233065783977508,
"signal/frontier_coverage_1/group_std_mean": 0.2709280252456665,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029919801652431487,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028933283407241105,
"signal/frontier_coverage_10/centered_abs_mean": 0.20233065783977508,
"signal/frontier_coverage_10/group_std_mean": 0.2709280252456665,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029919801652431487,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028933283407241105,
"signal/frontier_coverage_15/centered_abs_mean": 0.20233065783977508,
"signal/frontier_coverage_15/group_std_mean": 0.2709280252456665,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029919801652431487,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028933283407241105,
"signal/frontier_coverage_20/centered_abs_mean": 0.20233065783977508,
"signal/frontier_coverage_20/group_std_mean": 0.2709280252456665,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029919801652431487,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028933283407241105,
"signal/frontier_coverage_25/centered_abs_mean": 0.20233065783977508,
"signal/frontier_coverage_25/group_std_mean": 0.2709280252456665,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029919801652431487,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028933283407241105,
"signal/frontier_coverage_5/centered_abs_mean": 0.20233065783977508,
"signal/frontier_coverage_5/group_std_mean": 0.2709280252456665,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029919801652431487,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028933283407241105,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37160670161247256,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43908803462982177,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38857935070991517,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03716067224740982,
"step": 45
},
{
"calibration/aurc": 0.355543148661393,
"calibration/batch_distribution_entropy": 0.9560598149241851,
"calibration/buffer_distribution_entropy": 0.8409339110002637,
"calibration/confidence_entropy": 0.46758599740547824,
"calibration/coverage@0%": 0.0110337334407132,
"calibration/coverage@1%": 0.0110337334407132,
"calibration/coverage@10%": 0.011561437926201327,
"calibration/coverage@15%": 0.03893921379876839,
"calibration/coverage@20%": 0.09381513646509912,
"calibration/coverage@25%": 0.2094168255802026,
"calibration/coverage@30%": 0.4347323189967276,
"calibration/coverage@5%": 0.0110337334407132,
"calibration/ece": 0.1621294484579252,
"calibration/mean_confidence": 0.5582715356184847,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009895833333333348,
"completions/max_length": 3273.0,
"completions/max_terminated_length": 3273.0,
"completions/mean_length": 710.214501953125,
"completions/mean_terminated_length": 717.2854614257812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 228.6,
"epoch": 0.11999850001874976,
"grad_norm": 0.003402084344998002,
"learning_rate": 4.759036144578314e-06,
"loss": -0.0296,
"num_tokens": 98275375.0,
"reward": 0.9638577103614807,
"reward_std": 0.14062503576278687,
"rewards/accuracy_reward": 0.6588541626930237,
"rewards/brier_reward": 0.7617009282112122,
"rewards/confidence_uniqueness_reward": 0.9369927644729614,
"rewards/format_reward": 0.9899305582046509,
"rewards/frontier_coverage_0": 0.007022621482610703,
"rewards/frontier_coverage_1": 0.007022621482610703,
"rewards/frontier_coverage_10": 0.007022621482610703,
"rewards/frontier_coverage_15": 0.007022621482610703,
"rewards/frontier_coverage_20": 0.007022621482610703,
"rewards/frontier_coverage_25": 0.007022621482610703,
"rewards/frontier_coverage_5": 0.007022621482610703,
"rewards/frontier_entropy_batch_reward": -0.3110700786113739,
"signal/accuracy_reward/centered_abs_mean": 0.1725911468267441,
"signal/accuracy_reward/group_std_mean": 0.22528342604637147,
"signal/accuracy_reward/group_zero_std_frac": 0.3611111223697662,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9741186976432801,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08629557341337205,
"signal/advantage_abs_mean": 0.7553925156593323,
"signal/advantage_pre_scale_abs_mean": 0.10563597530126571,
"signal/advantage_pre_scale_std": 0.16184936761856078,
"signal/advantage_std": 0.9833512544631958,
"signal/brier_reward/centered_abs_mean": 0.18118281662464142,
"signal/brier_reward/group_std_mean": 0.22774460315704345,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20505461990833282,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01811828128993511,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031979148462414744,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05420147180557251,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03622420057654381,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031979148741811516,
"signal/format_reward/centered_abs_mean": 0.017957899160683154,
"signal/format_reward/group_std_mean": 0.037631581723690036,
"signal/format_reward/group_zero_std_frac": 0.830555546283722,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10187934935092927,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008978949580341577,
"signal/frontier_coverage_0/centered_abs_mean": 0.21007080078125,
"signal/frontier_coverage_0/group_std_mean": 0.27531993985176084,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034024206921458244,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030040125828236343,
"signal/frontier_coverage_1/centered_abs_mean": 0.21007080078125,
"signal/frontier_coverage_1/group_std_mean": 0.27531993985176084,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034024206921458244,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030040125828236343,
"signal/frontier_coverage_10/centered_abs_mean": 0.21007080078125,
"signal/frontier_coverage_10/group_std_mean": 0.27531993985176084,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.034024206921458244,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030040125828236343,
"signal/frontier_coverage_15/centered_abs_mean": 0.21007080078125,
"signal/frontier_coverage_15/group_std_mean": 0.27531993985176084,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.034024206921458244,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030040125828236343,
"signal/frontier_coverage_20/centered_abs_mean": 0.21007080078125,
"signal/frontier_coverage_20/group_std_mean": 0.27531993985176084,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034024206921458244,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030040125828236343,
"signal/frontier_coverage_25/centered_abs_mean": 0.21007080078125,
"signal/frontier_coverage_25/group_std_mean": 0.27531993985176084,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.034024206921458244,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030040125828236343,
"signal/frontier_coverage_5/centered_abs_mean": 0.21007080078125,
"signal/frontier_coverage_5/group_std_mean": 0.27531993985176084,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034024206921458244,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030040125828236343,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36411572694778443,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4315321445465088,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4120087444782257,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03641157373785973,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_calibration/aurc": 0.18507757147473192,
"eval_calibration/batch_distribution_entropy": 0.9183594624506147,
"eval_calibration/buffer_distribution_entropy": 0.8576227601929546,
"eval_calibration/confidence_entropy": 0.5061289053377749,
"eval_calibration/coverage@0%": 0.17943548387096775,
"eval_calibration/coverage@1%": 0.17943548387096775,
"eval_calibration/coverage@10%": 0.3020833333333333,
"eval_calibration/coverage@15%": 0.4437163978494623,
"eval_calibration/coverage@20%": 0.6270161290322581,
"eval_calibration/coverage@25%": 0.8429099462365591,
"eval_calibration/coverage@30%": 0.9479166666666666,
"eval_calibration/coverage@5%": 0.17943548387096775,
"eval_calibration/ece": 0.232350307883931,
"eval_calibration/mean_confidence": 0.5651359238441985,
"eval_completions/clipped_ratio": 0.009375000000000003,
"eval_completions/max_length": 2114.1666666666665,
"eval_completions/max_terminated_length": 2114.1666666666665,
"eval_completions/mean_length": 693.6932373046875,
"eval_completions/mean_terminated_length": 700.2240702311198,
"eval_completions/min_length": 72.33333333333333,
"eval_completions/min_terminated_length": 265.6666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 98275375.0,
"eval_reward": 0.8912924925486246,
"eval_reward_std": 0.23265416423479715,
"eval_rewards/accuracy_reward": 0.6527777711550394,
"eval_rewards/brier_reward": 0.7764979799588522,
"eval_rewards/confidence_uniqueness_reward": 0.8910275399684906,
"eval_rewards/format_reward": 0.9930555522441864,
"eval_rewards/frontier_coverage_0": 0.009279087030639252,
"eval_rewards/frontier_coverage_1": 0.009279087030639252,
"eval_rewards/frontier_coverage_10": 0.009279087030639252,
"eval_rewards/frontier_coverage_15": 0.009279087030639252,
"eval_rewards/frontier_coverage_20": 0.009279087030639252,
"eval_rewards/frontier_coverage_25": 0.009279087030639252,
"eval_rewards/frontier_coverage_5": 0.009279087030639252,
"eval_rewards/frontier_entropy_batch_reward": -0.9930555522441864,
"eval_runtime": 173.0155,
"eval_samples_per_second": 5.78,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4415147602558136,
"eval_signal/accuracy_reward/group_std_mean": 0.4768268217643102,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9577702283859253,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2207573801279068,
"eval_signal/advantage_abs_mean": 0.8893506626288096,
"eval_signal/advantage_pre_scale_abs_mean": 0.2068815752863884,
"eval_signal/advantage_pre_scale_std": 0.2305774266521136,
"eval_signal/advantage_std": 0.9863962332407633,
"eval_signal/brier_reward/centered_abs_mean": 0.19041885187228522,
"eval_signal/brier_reward/group_std_mean": 0.2465388998389244,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08267416805028915,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019041885621845722,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047221081952253975,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07311302361389001,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02038925824066003,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004722108171942334,
"eval_signal/format_reward/centered_abs_mean": 0.013454860852410397,
"eval_signal/format_reward/group_std_mean": 0.03928370991100868,
"eval_signal/format_reward/group_zero_std_frac": 0.7777777910232544,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.028587787101666134,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.006727430426205198,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.24869261930386224,
"eval_signal/frontier_coverage_0/group_std_mean": 0.35074693461259204,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015456531352053085,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035563044948503375,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.24869261930386224,
"eval_signal/frontier_coverage_1/group_std_mean": 0.35074693461259204,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015456531352053085,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035563044948503375,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.24869261930386224,
"eval_signal/frontier_coverage_10/group_std_mean": 0.35074693461259204,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015456531352053085,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035563044948503375,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.24869261930386224,
"eval_signal/frontier_coverage_15/group_std_mean": 0.35074693461259204,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015456531352053085,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035563044948503375,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.24869261930386224,
"eval_signal/frontier_coverage_20/group_std_mean": 0.35074693461259204,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015456531352053085,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035563044948503375,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.24869261930386224,
"eval_signal/frontier_coverage_25/group_std_mean": 0.35074693461259204,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015456531352053085,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035563044948503375,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.24869261930386224,
"eval_signal/frontier_coverage_5/group_std_mean": 0.35074693461259204,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015456531352053085,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035563044948503375,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013454860852410397,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.03928370991100868,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777777910232544,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005717557234068711,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013454861667317648,
"eval_steps_per_second": 0.035,
"step": 50
},
{
"calibration/aurc": 0.2572277962003569,
"calibration/batch_distribution_entropy": 0.9547779041044873,
"calibration/buffer_distribution_entropy": 0.8663927585115305,
"calibration/confidence_entropy": 0.5224142559254161,
"calibration/coverage@0%": 0.010124414417387751,
"calibration/coverage@1%": 0.010124414417387751,
"calibration/coverage@10%": 0.02617453887022441,
"calibration/coverage@15%": 0.19295365729541794,
"calibration/coverage@20%": 0.3283553771989877,
"calibration/coverage@25%": 0.5176317108323689,
"calibration/coverage@30%": 0.7306122575305988,
"calibration/coverage@5%": 0.010124414417387751,
"calibration/ece": 0.13155749677237122,
"calibration/mean_confidence": 0.5880504501565289,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009461805555555581,
"completions/max_length": 3255.2,
"completions/max_terminated_length": 3255.2,
"completions/mean_length": 727.8955688476562,
"completions/mean_terminated_length": 734.931005859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.4,
"epoch": 0.13199835002062474,
"grad_norm": 0.0026916302740573883,
"learning_rate": 4.60843373493976e-06,
"loss": -0.0219,
"num_tokens": 109741308.0,
"reward": 0.9618936777114868,
"reward_std": 0.13674592971801758,
"rewards/accuracy_reward": 0.6470486044883728,
"rewards/brier_reward": 0.7674099326133728,
"rewards/confidence_uniqueness_reward": 0.9404424667358399,
"rewards/format_reward": 0.9904513955116272,
"rewards/frontier_coverage_0": 0.001309068128466606,
"rewards/frontier_coverage_1": 0.001309068128466606,
"rewards/frontier_coverage_10": 0.001309068128466606,
"rewards/frontier_coverage_15": 0.001309068128466606,
"rewards/frontier_coverage_20": 0.001309068128466606,
"rewards/frontier_coverage_25": 0.001309068128466606,
"rewards/frontier_coverage_5": 0.001309068128466606,
"rewards/frontier_entropy_batch_reward": -0.2777259886264801,
"signal/accuracy_reward/centered_abs_mean": 0.16701388657093047,
"signal/accuracy_reward/group_std_mean": 0.2208912193775177,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9170358538627624,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08350694328546523,
"signal/advantage_abs_mean": 0.7515268087387085,
"signal/advantage_pre_scale_abs_mean": 0.10258873105049134,
"signal/advantage_pre_scale_std": 0.15661307275295258,
"signal/advantage_std": 0.9833774209022522,
"signal/brier_reward/centered_abs_mean": 0.16898567974567413,
"signal/brier_reward/group_std_mean": 0.21254501342773438,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18657754361629486,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016898567974567413,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02872337996959686,
"signal/confidence_uniqueness_reward/group_std_mean": 0.047750599682331085,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03172791600227356,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028723380994051693,
"signal/format_reward/centered_abs_mean": 0.016373698227107526,
"signal/format_reward/group_std_mean": 0.032994627952575684,
"signal/format_reward/group_zero_std_frac": 0.8583333373069764,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09072078242897988,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008186849113553763,
"signal/frontier_coverage_0/centered_abs_mean": 0.19720979034900665,
"signal/frontier_coverage_0/group_std_mean": 0.2574134826660156,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031110198795795442,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028200999833643435,
"signal/frontier_coverage_1/centered_abs_mean": 0.19720979034900665,
"signal/frontier_coverage_1/group_std_mean": 0.2574134826660156,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031110198795795442,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028200999833643435,
"signal/frontier_coverage_10/centered_abs_mean": 0.19720979034900665,
"signal/frontier_coverage_10/group_std_mean": 0.2574134826660156,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.031110198795795442,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028200999833643435,
"signal/frontier_coverage_15/centered_abs_mean": 0.19720979034900665,
"signal/frontier_coverage_15/group_std_mean": 0.2574134826660156,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.031110198795795442,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028200999833643435,
"signal/frontier_coverage_20/centered_abs_mean": 0.19720979034900665,
"signal/frontier_coverage_20/group_std_mean": 0.2574134826660156,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.031110198795795442,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028200999833643435,
"signal/frontier_coverage_25/centered_abs_mean": 0.19720979034900665,
"signal/frontier_coverage_25/group_std_mean": 0.2574134826660156,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031110198795795442,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028200999833643435,
"signal/frontier_coverage_5/centered_abs_mean": 0.19720979034900665,
"signal/frontier_coverage_5/group_std_mean": 0.2574134826660156,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031110198795795442,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028200999833643435,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3412093102931976,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41135616302490235,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3766399085521698,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034120932966470716,
"step": 55
},
{
"calibration/aurc": 0.27743387615755427,
"calibration/batch_distribution_entropy": 0.9583523833618678,
"calibration/buffer_distribution_entropy": 0.8804019885998695,
"calibration/confidence_entropy": 0.466344529326517,
"calibration/coverage@0%": 0.013651509291601752,
"calibration/coverage@1%": 0.013651509291601752,
"calibration/coverage@10%": 0.15538102293714737,
"calibration/coverage@15%": 0.37334421356472086,
"calibration/coverage@20%": 0.45284050566706346,
"calibration/coverage@25%": 0.5141313051302937,
"calibration/coverage@30%": 0.6727813439434129,
"calibration/coverage@5%": 0.019410671595266674,
"calibration/ece": 0.1604155313309958,
"calibration/mean_confidence": 0.5627900217322679,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01449652777777779,
"completions/max_length": 3593.0,
"completions/max_terminated_length": 3593.0,
"completions/mean_length": 727.8925415039063,
"completions/mean_terminated_length": 738.712353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.8,
"epoch": 0.14399820002249972,
"grad_norm": 0.0024423820432275534,
"learning_rate": 4.457831325301205e-06,
"loss": -0.0326,
"num_tokens": 121223206.0,
"reward": 0.9519212007522583,
"reward_std": 0.14070754647254943,
"rewards/accuracy_reward": 0.6276041746139527,
"rewards/brier_reward": 0.762453269958496,
"rewards/confidence_uniqueness_reward": 0.9355636954307556,
"rewards/format_reward": 0.9853298425674438,
"rewards/frontier_coverage_0": 0.025408835709095003,
"rewards/frontier_coverage_1": 0.025408835709095003,
"rewards/frontier_coverage_10": 0.025408835709095003,
"rewards/frontier_coverage_15": 0.025408835709095003,
"rewards/frontier_coverage_20": 0.025408835709095003,
"rewards/frontier_coverage_25": 0.025408835709095003,
"rewards/frontier_coverage_5": 0.025408835709095003,
"rewards/frontier_entropy_batch_reward": -0.2689092069864273,
"signal/accuracy_reward/centered_abs_mean": 0.1701822906732559,
"signal/accuracy_reward/group_std_mean": 0.2256343573331833,
"signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9568945646286011,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08509114533662795,
"signal/advantage_abs_mean": 0.7413867354393006,
"signal/advantage_pre_scale_abs_mean": 0.10248211473226547,
"signal/advantage_pre_scale_std": 0.16330770254135132,
"signal/advantage_std": 0.9833378672599793,
"signal/brier_reward/centered_abs_mean": 0.18149828016757966,
"signal/brier_reward/group_std_mean": 0.22987159788608552,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20609477162361145,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.018149828910827635,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03701090067625046,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06427684798836708,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04217044934630394,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037010901141911745,
"signal/format_reward/centered_abs_mean": 0.02552625834941864,
"signal/format_reward/group_std_mean": 0.05108080431818962,
"signal/format_reward/group_zero_std_frac": 0.7805555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.14410489052534103,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01276312917470932,
"signal/frontier_coverage_0/centered_abs_mean": 0.22716614007949829,
"signal/frontier_coverage_0/group_std_mean": 0.2958860158920288,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03683609813451767,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003248475771397352,
"signal/frontier_coverage_1/centered_abs_mean": 0.22716614007949829,
"signal/frontier_coverage_1/group_std_mean": 0.2958860158920288,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03683609813451767,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003248475771397352,
"signal/frontier_coverage_10/centered_abs_mean": 0.22716614007949829,
"signal/frontier_coverage_10/group_std_mean": 0.2958860158920288,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03683609813451767,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003248475771397352,
"signal/frontier_coverage_15/centered_abs_mean": 0.22716614007949829,
"signal/frontier_coverage_15/group_std_mean": 0.2958860158920288,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03683609813451767,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003248475771397352,
"signal/frontier_coverage_20/centered_abs_mean": 0.22716614007949829,
"signal/frontier_coverage_20/group_std_mean": 0.2958860158920288,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03683609813451767,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003248475771397352,
"signal/frontier_coverage_25/centered_abs_mean": 0.22716614007949829,
"signal/frontier_coverage_25/group_std_mean": 0.2958860158920288,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03683609813451767,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003248475771397352,
"signal/frontier_coverage_5/centered_abs_mean": 0.22716614007949829,
"signal/frontier_coverage_5/group_std_mean": 0.2958860158920288,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03683609813451767,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003248475771397352,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3329376816749573,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4049929976463318,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3823388457298279,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03329376950860023,
"step": 60
},
{
"calibration/aurc": 0.24187817445696086,
"calibration/batch_distribution_entropy": 0.9537951149289101,
"calibration/buffer_distribution_entropy": 0.895752842577633,
"calibration/confidence_entropy": 0.5066892649603131,
"calibration/coverage@0%": 0.014859234868555388,
"calibration/coverage@1%": 0.014859234868555388,
"calibration/coverage@10%": 0.1557253084507572,
"calibration/coverage@15%": 0.22767872436187964,
"calibration/coverage@20%": 0.5952973900478373,
"calibration/coverage@25%": 0.6492208361304835,
"calibration/coverage@30%": 0.7082545960594742,
"calibration/coverage@5%": 0.07725923486855539,
"calibration/ece": 0.14400525368708186,
"calibration/mean_confidence": 0.5951536858486229,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011979166666666652,
"completions/max_length": 3382.8,
"completions/max_terminated_length": 3382.8,
"completions/mean_length": 631.187939453125,
"completions/mean_terminated_length": 638.8400756835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 141.2,
"epoch": 0.1559980500243747,
"grad_norm": 0.0027344543486833572,
"learning_rate": 4.307228915662651e-06,
"loss": -0.0313,
"num_tokens": 131588539.0,
"reward": 0.9630724668502808,
"reward_std": 0.13918745368719102,
"rewards/accuracy_reward": 0.6561631917953491,
"rewards/brier_reward": 0.7739776849746705,
"rewards/confidence_uniqueness_reward": 0.9360240459442138,
"rewards/format_reward": 0.9877604246139526,
"rewards/frontier_coverage_0": 0.011109796725213528,
"rewards/frontier_coverage_1": 0.011109796725213528,
"rewards/frontier_coverage_10": 0.011109796725213528,
"rewards/frontier_coverage_15": 0.011109796725213528,
"rewards/frontier_coverage_20": 0.011109796725213528,
"rewards/frontier_coverage_25": 0.011109796725213528,
"rewards/frontier_coverage_5": 0.011109796725213528,
"rewards/frontier_entropy_batch_reward": -0.31001612544059753,
"signal/accuracy_reward/centered_abs_mean": 0.15845811367034912,
"signal/accuracy_reward/group_std_mean": 0.21250716745853424,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9355194449424744,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07922905683517456,
"signal/advantage_abs_mean": 0.7518455505371093,
"signal/advantage_pre_scale_abs_mean": 0.10312150418758392,
"signal/advantage_pre_scale_std": 0.1631181061267853,
"signal/advantage_std": 0.9832925438880921,
"signal/brier_reward/centered_abs_mean": 0.17200563251972198,
"signal/brier_reward/group_std_mean": 0.21544553339481354,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20448561310768126,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017200562357902526,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03373164795339108,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05614056885242462,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04000279903411865,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033731648698449137,
"signal/format_reward/centered_abs_mean": 0.02146809957921505,
"signal/format_reward/group_std_mean": 0.04147007092833519,
"signal/format_reward/group_zero_std_frac": 0.8277777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12673527002334595,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010734049789607524,
"signal/frontier_coverage_0/centered_abs_mean": 0.18637515604496002,
"signal/frontier_coverage_0/group_std_mean": 0.24511989057064057,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03161940351128578,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026651647873222827,
"signal/frontier_coverage_1/centered_abs_mean": 0.18637515604496002,
"signal/frontier_coverage_1/group_std_mean": 0.24511989057064057,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03161940351128578,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026651647873222827,
"signal/frontier_coverage_10/centered_abs_mean": 0.18637515604496002,
"signal/frontier_coverage_10/group_std_mean": 0.24511989057064057,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03161940351128578,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026651647873222827,
"signal/frontier_coverage_15/centered_abs_mean": 0.18637515604496002,
"signal/frontier_coverage_15/group_std_mean": 0.24511989057064057,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03161940351128578,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026651647873222827,
"signal/frontier_coverage_20/centered_abs_mean": 0.18637515604496002,
"signal/frontier_coverage_20/group_std_mean": 0.24511989057064057,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03161940351128578,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026651647873222827,
"signal/frontier_coverage_25/centered_abs_mean": 0.18637515604496002,
"signal/frontier_coverage_25/group_std_mean": 0.24511989057064057,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03161940351128578,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026651647873222827,
"signal/frontier_coverage_5/centered_abs_mean": 0.18637515604496002,
"signal/frontier_coverage_5/group_std_mean": 0.24511989057064057,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03161940351128578,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026651647873222827,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35859541296958924,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42603600025177,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4278856158256531,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035859542340040206,
"step": 65
},
{
"calibration/aurc": 0.31177324033790227,
"calibration/batch_distribution_entropy": 0.9377009466084179,
"calibration/buffer_distribution_entropy": 0.9052739502005067,
"calibration/confidence_entropy": 0.4595234811538427,
"calibration/coverage@0%": 0.0125507308012776,
"calibration/coverage@1%": 0.0125507308012776,
"calibration/coverage@10%": 0.025171344836365316,
"calibration/coverage@15%": 0.17121301150303198,
"calibration/coverage@20%": 0.2777535816784706,
"calibration/coverage@25%": 0.31907901969633845,
"calibration/coverage@30%": 0.48218757232791737,
"calibration/coverage@5%": 0.0125507308012776,
"calibration/ece": 0.22044916226110992,
"calibration/mean_confidence": 0.5375950511256701,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0066840277777777905,
"completions/max_length": 3311.8,
"completions/max_terminated_length": 3311.8,
"completions/mean_length": 603.350341796875,
"completions/mean_terminated_length": 607.4075073242187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.16799790002624967,
"grad_norm": 0.002717731287702918,
"learning_rate": 4.156626506024097e-06,
"loss": -0.0122,
"num_tokens": 141617279.0,
"reward": 0.9516151428222657,
"reward_std": 0.12674596905708313,
"rewards/accuracy_reward": 0.6421875,
"rewards/brier_reward": 0.7418337464332581,
"rewards/confidence_uniqueness_reward": 0.9369692325592041,
"rewards/format_reward": 0.9933159828186036,
"rewards/frontier_coverage_0": 0.0047592608723789455,
"rewards/frontier_coverage_1": 0.0047592608723789455,
"rewards/frontier_coverage_10": 0.0047592608723789455,
"rewards/frontier_coverage_15": 0.0047592608723789455,
"rewards/frontier_coverage_20": 0.0047592608723789455,
"rewards/frontier_coverage_25": 0.0047592608723789455,
"rewards/frontier_coverage_5": 0.0047592608723789455,
"rewards/frontier_entropy_batch_reward": -0.34493361711502074,
"signal/accuracy_reward/centered_abs_mean": 0.16208766996860505,
"signal/accuracy_reward/group_std_mean": 0.2136603981256485,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9817174792289733,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08104383498430252,
"signal/advantage_abs_mean": 0.7454643368721008,
"signal/advantage_pre_scale_abs_mean": 0.09447728544473648,
"signal/advantage_pre_scale_std": 0.14597638845443725,
"signal/advantage_std": 0.9832653284072876,
"signal/brier_reward/centered_abs_mean": 0.19506115317344666,
"signal/brier_reward/group_std_mean": 0.24160505831241608,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23666558563709258,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019506115466356277,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02871289774775505,
"signal/confidence_uniqueness_reward/group_std_mean": 0.045819585025310514,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0351563211530447,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002871289849281311,
"signal/format_reward/centered_abs_mean": 0.01219075545668602,
"signal/format_reward/group_std_mean": 0.026185811311006547,
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07440270856022835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00609537772834301,
"signal/frontier_coverage_0/centered_abs_mean": 0.2429557830095291,
"signal/frontier_coverage_0/group_std_mean": 0.31135170757770536,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04189819991588593,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034742677584290505,
"signal/frontier_coverage_1/centered_abs_mean": 0.2429557830095291,
"signal/frontier_coverage_1/group_std_mean": 0.31135170757770536,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04189819991588593,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034742677584290505,
"signal/frontier_coverage_10/centered_abs_mean": 0.2429557830095291,
"signal/frontier_coverage_10/group_std_mean": 0.31135170757770536,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04189819991588593,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034742677584290505,
"signal/frontier_coverage_15/centered_abs_mean": 0.2429557830095291,
"signal/frontier_coverage_15/group_std_mean": 0.31135170757770536,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04189819991588593,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034742677584290505,
"signal/frontier_coverage_20/centered_abs_mean": 0.2429557830095291,
"signal/frontier_coverage_20/group_std_mean": 0.31135170757770536,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04189819991588593,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034742677584290505,
"signal/frontier_coverage_25/centered_abs_mean": 0.2429557830095291,
"signal/frontier_coverage_25/group_std_mean": 0.31135170757770536,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04189819991588593,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034742677584290505,
"signal/frontier_coverage_5/centered_abs_mean": 0.2429557830095291,
"signal/frontier_coverage_5/group_std_mean": 0.31135170757770536,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04189819991588593,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034742677584290505,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3646996796131134,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4328969597816467,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4460917890071869,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03646996915340424,
"step": 70
},
{
"calibration/aurc": 0.3101440805164673,
"calibration/batch_distribution_entropy": 0.9050061498877229,
"calibration/buffer_distribution_entropy": 0.9116176363911908,
"calibration/confidence_entropy": 0.5082916114231081,
"calibration/coverage@0%": 0.00576069634986306,
"calibration/coverage@1%": 0.00576069634986306,
"calibration/coverage@10%": 0.00576069634986306,
"calibration/coverage@15%": 0.18960560085429345,
"calibration/coverage@20%": 0.283362673405381,
"calibration/coverage@25%": 0.4146926660345467,
"calibration/coverage@30%": 0.41784227233375937,
"calibration/coverage@5%": 0.00576069634986306,
"calibration/ece": 0.197117675594798,
"calibration/mean_confidence": 0.661174791007752,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004861111111111138,
"completions/max_length": 2847.6,
"completions/max_terminated_length": 2847.6,
"completions/mean_length": 612.6124267578125,
"completions/mean_terminated_length": 615.6407348632813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.8,
"epoch": 0.17999775002812465,
"grad_norm": 0.0024783292319625616,
"learning_rate": 4.006024096385543e-06,
"loss": -0.0033,
"num_tokens": 151739470.0,
"reward": 0.966460108757019,
"reward_std": 0.13642458617687225,
"rewards/accuracy_reward": 0.6873263835906982,
"rewards/brier_reward": 0.7391559958457947,
"rewards/confidence_uniqueness_reward": 0.9389071345329285,
"rewards/format_reward": 0.9947048544883728,
"rewards/frontier_coverage_0": -0.0546910285949707,
"rewards/frontier_coverage_1": -0.0546910285949707,
"rewards/frontier_coverage_10": -0.0546910285949707,
"rewards/frontier_coverage_15": -0.0546910285949707,
"rewards/frontier_coverage_20": -0.0546910285949707,
"rewards/frontier_coverage_25": -0.0546910285949707,
"rewards/frontier_coverage_5": -0.0546910285949707,
"rewards/frontier_entropy_batch_reward": -0.36887272596359255,
"signal/accuracy_reward/centered_abs_mean": 0.15999349057674409,
"signal/accuracy_reward/group_std_mean": 0.20950167179107665,
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9744468212127686,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07999674528837204,
"signal/advantage_abs_mean": 0.7634802103042603,
"signal/advantage_pre_scale_abs_mean": 0.10433225780725479,
"signal/advantage_pre_scale_std": 0.15965070724487304,
"signal/advantage_std": 0.9832550525665283,
"signal/brier_reward/centered_abs_mean": 0.19047823250293733,
"signal/brier_reward/group_std_mean": 0.2352720856666565,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23357610106468202,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019047823548316956,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02479529082775116,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04085197448730469,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030193888396024705,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002479529147967696,
"signal/format_reward/centered_abs_mean": 0.009879557183012366,
"signal/format_reward/group_std_mean": 0.022797855362296105,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05883842520415783,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004939778591506183,
"signal/frontier_coverage_0/centered_abs_mean": 0.16593956649303437,
"signal/frontier_coverage_0/group_std_mean": 0.2201917886734009,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02901824899017811,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023729358334094288,
"signal/frontier_coverage_1/centered_abs_mean": 0.16593956649303437,
"signal/frontier_coverage_1/group_std_mean": 0.2201917886734009,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02901824899017811,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023729358334094288,
"signal/frontier_coverage_10/centered_abs_mean": 0.16593956649303437,
"signal/frontier_coverage_10/group_std_mean": 0.2201917886734009,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02901824899017811,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023729358334094288,
"signal/frontier_coverage_15/centered_abs_mean": 0.16593956649303437,
"signal/frontier_coverage_15/group_std_mean": 0.2201917886734009,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02901824899017811,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023729358334094288,
"signal/frontier_coverage_20/centered_abs_mean": 0.16593956649303437,
"signal/frontier_coverage_20/group_std_mean": 0.2201917886734009,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02901824899017811,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023729358334094288,
"signal/frontier_coverage_25/centered_abs_mean": 0.16593956649303437,
"signal/frontier_coverage_25/group_std_mean": 0.2201917886734009,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02901824899017811,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023729358334094288,
"signal/frontier_coverage_5/centered_abs_mean": 0.16593956649303437,
"signal/frontier_coverage_5/group_std_mean": 0.2201917886734009,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02901824899017811,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023729358334094288,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3872749865055084,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44775003790855405,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4765858590602875,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038727499544620514,
"step": 75
},
{
"calibration/aurc": 0.24882971050558633,
"calibration/batch_distribution_entropy": 0.9163427844868991,
"calibration/buffer_distribution_entropy": 0.9156692618801193,
"calibration/confidence_entropy": 0.5399646911523172,
"calibration/coverage@0%": 0.0041666666666666675,
"calibration/coverage@1%": 0.0041666666666666675,
"calibration/coverage@10%": 0.01832759186351706,
"calibration/coverage@15%": 0.2557291666666667,
"calibration/coverage@20%": 0.40364583333333337,
"calibration/coverage@25%": 0.6344480340606008,
"calibration/coverage@30%": 0.7081002920035939,
"calibration/coverage@5%": 0.0041666666666666675,
"calibration/ece": 0.1941888822566929,
"calibration/mean_confidence": 0.6331814034577334,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00868055555555558,
"completions/max_length": 3604.4,
"completions/max_terminated_length": 3604.4,
"completions/mean_length": 645.2096435546875,
"completions/mean_terminated_length": 650.9556030273437,
"completions/min_length": 0.0,
"completions/min_terminated_length": 125.2,
"epoch": 0.19199760002999963,
"grad_norm": 0.002496064407750964,
"learning_rate": 3.855421686746989e-06,
"loss": -0.0084,
"num_tokens": 162225565.0,
"reward": 0.9505057215690613,
"reward_std": 0.1393027275800705,
"rewards/accuracy_reward": 0.6585069298744202,
"rewards/brier_reward": 0.729674780368805,
"rewards/confidence_uniqueness_reward": 0.9360662698745728,
"rewards/format_reward": 0.9909722208976746,
"rewards/frontier_coverage_0": -0.051046742522157726,
"rewards/frontier_coverage_1": -0.051046742522157726,
"rewards/frontier_coverage_10": -0.051046742522157726,
"rewards/frontier_coverage_15": -0.051046742522157726,
"rewards/frontier_coverage_20": -0.051046742522157726,
"rewards/frontier_coverage_25": -0.051046742522157726,
"rewards/frontier_coverage_5": -0.051046742522157726,
"rewards/frontier_entropy_batch_reward": -0.35698198080062865,
"signal/accuracy_reward/centered_abs_mean": 0.1613064229488373,
"signal/accuracy_reward/group_std_mean": 0.21208280324935913,
"signal/accuracy_reward/group_zero_std_frac": 0.40555556416511535,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9074809789657593,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08065321147441865,
"signal/advantage_abs_mean": 0.7607282400131226,
"signal/advantage_pre_scale_abs_mean": 0.10646351128816604,
"signal/advantage_pre_scale_std": 0.16107785999774932,
"signal/advantage_std": 0.9833507299423218,
"signal/brier_reward/centered_abs_mean": 0.18236831128597258,
"signal/brier_reward/group_std_mean": 0.22466041147708893,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20613384544849395,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.018236831203103064,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026830673590302466,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04254492111504078,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030164846032857896,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002683067345060408,
"signal/format_reward/centered_abs_mean": 0.013585069729015232,
"signal/format_reward/group_std_mean": 0.026397685706615447,
"signal/format_reward/group_zero_std_frac": 0.8833333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07591437287628651,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006792534864507616,
"signal/frontier_coverage_0/centered_abs_mean": 0.16189261376857758,
"signal/frontier_coverage_0/group_std_mean": 0.21290515959262848,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02617349661886692,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023150643799453976,
"signal/frontier_coverage_1/centered_abs_mean": 0.16189261376857758,
"signal/frontier_coverage_1/group_std_mean": 0.21290515959262848,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02617349661886692,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023150643799453976,
"signal/frontier_coverage_10/centered_abs_mean": 0.16189261376857758,
"signal/frontier_coverage_10/group_std_mean": 0.21290515959262848,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02617349661886692,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023150643799453976,
"signal/frontier_coverage_15/centered_abs_mean": 0.16189261376857758,
"signal/frontier_coverage_15/group_std_mean": 0.21290515959262848,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02617349661886692,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023150643799453976,
"signal/frontier_coverage_20/centered_abs_mean": 0.16189261376857758,
"signal/frontier_coverage_20/group_std_mean": 0.21290515959262848,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02617349661886692,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023150643799453976,
"signal/frontier_coverage_25/centered_abs_mean": 0.16189261376857758,
"signal/frontier_coverage_25/group_std_mean": 0.21290515959262848,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02617349661886692,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023150643799453976,
"signal/frontier_coverage_5/centered_abs_mean": 0.16189261376857758,
"signal/frontier_coverage_5/group_std_mean": 0.21290515959262848,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02617349661886692,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023150643799453976,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37562611103057864,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4406170785427094,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4253277540206909,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03756261095404625,
"step": 80
},
{
"calibration/aurc": 0.3014076575162675,
"calibration/batch_distribution_entropy": 0.9125055866858618,
"calibration/buffer_distribution_entropy": 0.9188331792178456,
"calibration/confidence_entropy": 0.5207189815765981,
"calibration/coverage@0%": 0.00841005981688481,
"calibration/coverage@1%": 0.00841005981688481,
"calibration/coverage@10%": 0.00998486296649111,
"calibration/coverage@15%": 0.014755210586203521,
"calibration/coverage@20%": 0.032083614827450556,
"calibration/coverage@25%": 0.3209187626504969,
"calibration/coverage@30%": 0.607871104717331,
"calibration/coverage@5%": 0.00841005981688481,
"calibration/ece": 0.16054550562218886,
"calibration/mean_confidence": 0.6530601834127058,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006944444444444442,
"completions/max_length": 3695.8,
"completions/max_terminated_length": 3695.8,
"completions/mean_length": 648.8045166015625,
"completions/mean_terminated_length": 653.3523803710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.6,
"epoch": 0.2039974500318746,
"grad_norm": 0.002358554396778345,
"learning_rate": 3.7048192771084342e-06,
"loss": -0.0216,
"num_tokens": 172786993.0,
"reward": 0.9661161661148071,
"reward_std": 0.13587609827518463,
"rewards/accuracy_reward": 0.6900173425674438,
"rewards/brier_reward": 0.7510282516479492,
"rewards/confidence_uniqueness_reward": 0.9363226532936096,
"rewards/format_reward": 0.992881965637207,
"rewards/frontier_coverage_0": -0.04805287569761276,
"rewards/frontier_coverage_1": -0.04805287569761276,
"rewards/frontier_coverage_10": -0.04805287569761276,
"rewards/frontier_coverage_15": -0.04805287569761276,
"rewards/frontier_coverage_20": -0.04805287569761276,
"rewards/frontier_coverage_25": -0.04805287569761276,
"rewards/frontier_coverage_5": -0.04805287569761276,
"rewards/frontier_entropy_batch_reward": -0.39258493185043336,
"signal/accuracy_reward/centered_abs_mean": 0.1508843332529068,
"signal/accuracy_reward/group_std_mean": 0.20189307630062103,
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9263910770416259,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0754421666264534,
"signal/advantage_abs_mean": 0.7528648376464844,
"signal/advantage_pre_scale_abs_mean": 0.10168863832950592,
"signal/advantage_pre_scale_std": 0.1590551733970642,
"signal/advantage_std": 0.9832520723342896,
"signal/brier_reward/centered_abs_mean": 0.17436771094799042,
"signal/brier_reward/group_std_mean": 0.2172168791294098,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21471179723739625,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017436770349740983,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02653498910367489,
"signal/confidence_uniqueness_reward/group_std_mean": 0.045722561329603194,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032590895891189575,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026534989941865207,
"signal/format_reward/centered_abs_mean": 0.013118489645421505,
"signal/format_reward/group_std_mean": 0.029562078043818475,
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08024730533361435,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0065592448227107525,
"signal/frontier_coverage_0/centered_abs_mean": 0.14816038608551024,
"signal/frontier_coverage_0/group_std_mean": 0.19712282717227936,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.026096120849251746,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002118693618103862,
"signal/frontier_coverage_1/centered_abs_mean": 0.14816038608551024,
"signal/frontier_coverage_1/group_std_mean": 0.19712282717227936,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.026096120849251746,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002118693618103862,
"signal/frontier_coverage_10/centered_abs_mean": 0.14816038608551024,
"signal/frontier_coverage_10/group_std_mean": 0.19712282717227936,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.026096120849251746,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002118693618103862,
"signal/frontier_coverage_15/centered_abs_mean": 0.14816038608551024,
"signal/frontier_coverage_15/group_std_mean": 0.19712282717227936,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026096120849251746,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002118693618103862,
"signal/frontier_coverage_20/centered_abs_mean": 0.14816038608551024,
"signal/frontier_coverage_20/group_std_mean": 0.19712282717227936,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026096120849251746,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002118693618103862,
"signal/frontier_coverage_25/centered_abs_mean": 0.14816038608551024,
"signal/frontier_coverage_25/group_std_mean": 0.19712282717227936,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026096120849251746,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002118693618103862,
"signal/frontier_coverage_5/centered_abs_mean": 0.14816038608551024,
"signal/frontier_coverage_5/group_std_mean": 0.19712282717227936,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026096120849251746,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002118693618103862,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3885017096996307,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44662662744522097,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4786907732486725,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03885017111897469,
"step": 85
},
{
"calibration/aurc": 0.2280682511148271,
"calibration/batch_distribution_entropy": 0.9191445965224319,
"calibration/buffer_distribution_entropy": 0.9203928575702902,
"calibration/confidence_entropy": 0.5090574346056453,
"calibration/coverage@0%": 0.00573603781882146,
"calibration/coverage@1%": 0.00573603781882146,
"calibration/coverage@10%": 0.055215204485488126,
"calibration/coverage@15%": 0.3963610378188215,
"calibration/coverage@20%": 0.5604235378188214,
"calibration/coverage@25%": 0.6255277044854881,
"calibration/coverage@30%": 0.7142562664907651,
"calibration/coverage@5%": 0.01667353781882146,
"calibration/ece": 0.1746891027457266,
"calibration/mean_confidence": 0.652321831845221,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00512152777777779,
"completions/max_length": 3058.2,
"completions/max_terminated_length": 3058.2,
"completions/mean_length": 614.4275146484375,
"completions/mean_terminated_length": 617.5863159179687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.4,
"epoch": 0.2159973000337496,
"grad_norm": 0.002601947635412216,
"learning_rate": 3.5542168674698798e-06,
"loss": -0.0059,
"num_tokens": 182933870.0,
"reward": 0.9689822554588318,
"reward_std": 0.13325872272253036,
"rewards/accuracy_reward": 0.684375,
"rewards/brier_reward": 0.7553081393241883,
"rewards/confidence_uniqueness_reward": 0.9397584915161132,
"rewards/format_reward": 0.9948784708976746,
"rewards/frontier_coverage_0": -0.039187131077051164,
"rewards/frontier_coverage_1": -0.039187131077051164,
"rewards/frontier_coverage_10": -0.039187131077051164,
"rewards/frontier_coverage_15": -0.039187131077051164,
"rewards/frontier_coverage_20": -0.039187131077051164,
"rewards/frontier_coverage_25": -0.039187131077051164,
"rewards/frontier_coverage_5": -0.039187131077051164,
"rewards/frontier_entropy_batch_reward": -0.3622850239276886,
"signal/accuracy_reward/centered_abs_mean": 0.1550998270511627,
"signal/accuracy_reward/group_std_mean": 0.20906379520893098,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9399829387664795,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07754991352558135,
"signal/advantage_abs_mean": 0.7509470582008362,
"signal/advantage_pre_scale_abs_mean": 0.10027577131986617,
"signal/advantage_pre_scale_std": 0.15472148954868317,
"signal/advantage_std": 0.9832675933837891,
"signal/brier_reward/centered_abs_mean": 0.18083776235580445,
"signal/brier_reward/group_std_mean": 0.22394680380821227,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21995324194431304,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.018083777278661728,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024045027419924737,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03864929303526878,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029247282445430754,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002404502872377634,
"signal/format_reward/centered_abs_mean": 0.009467230830341577,
"signal/format_reward/group_std_mean": 0.02073230631649494,
"signal/format_reward/group_zero_std_frac": 0.9055555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05722929909825325,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004733615415170788,
"signal/frontier_coverage_0/centered_abs_mean": 0.16228995025157927,
"signal/frontier_coverage_0/group_std_mean": 0.21590131521224976,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02817150242626667,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023207463324069976,
"signal/frontier_coverage_1/centered_abs_mean": 0.16228995025157927,
"signal/frontier_coverage_1/group_std_mean": 0.21590131521224976,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02817150242626667,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023207463324069976,
"signal/frontier_coverage_10/centered_abs_mean": 0.16228995025157927,
"signal/frontier_coverage_10/group_std_mean": 0.21590131521224976,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02817150242626667,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023207463324069976,
"signal/frontier_coverage_15/centered_abs_mean": 0.16228995025157927,
"signal/frontier_coverage_15/group_std_mean": 0.21590131521224976,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02817150242626667,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023207463324069976,
"signal/frontier_coverage_20/centered_abs_mean": 0.16228995025157927,
"signal/frontier_coverage_20/group_std_mean": 0.21590131521224976,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02817150242626667,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023207463324069976,
"signal/frontier_coverage_25/centered_abs_mean": 0.16228995025157927,
"signal/frontier_coverage_25/group_std_mean": 0.21590131521224976,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02817150242626667,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023207463324069976,
"signal/frontier_coverage_5/centered_abs_mean": 0.16228995025157927,
"signal/frontier_coverage_5/group_std_mean": 0.21590131521224976,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02817150242626667,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023207463324069976,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38060142397880553,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44278682470321656,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46376983523368837,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03806014358997345,
"step": 90
},
{
"calibration/aurc": 0.2656779618361436,
"calibration/batch_distribution_entropy": 0.921673087913151,
"calibration/buffer_distribution_entropy": 0.9221058711743622,
"calibration/confidence_entropy": 0.5219745548320699,
"calibration/coverage@0%": 0.003655373839760502,
"calibration/coverage@1%": 0.003655373839760502,
"calibration/coverage@10%": 0.10779749862161041,
"calibration/coverage@15%": 0.37434690961637485,
"calibration/coverage@20%": 0.4020928738397605,
"calibration/coverage@25%": 0.5248110125353711,
"calibration/coverage@30%": 0.6770166637048378,
"calibration/coverage@5%": 0.003655373839760502,
"calibration/ece": 0.1682618991312315,
"calibration/mean_confidence": 0.6443340086406893,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003993055555555558,
"completions/max_length": 3106.8,
"completions/max_terminated_length": 3106.8,
"completions/mean_length": 624.9882690429688,
"completions/mean_terminated_length": 627.4920532226563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.22799715003562457,
"grad_norm": 0.0027543448377400637,
"learning_rate": 3.4036144578313257e-06,
"loss": -0.0072,
"num_tokens": 193225415.0,
"reward": 0.9632153868675232,
"reward_std": 0.12627761960029601,
"rewards/accuracy_reward": 0.6706597208976746,
"rewards/brier_reward": 0.7587080836296082,
"rewards/confidence_uniqueness_reward": 0.9400440454483032,
"rewards/format_reward": 0.9953993082046508,
"rewards/frontier_coverage_0": -0.027446018159389497,
"rewards/frontier_coverage_1": -0.027446018159389497,
"rewards/frontier_coverage_10": -0.027446018159389497,
"rewards/frontier_coverage_15": -0.027446018159389497,
"rewards/frontier_coverage_20": -0.027446018159389497,
"rewards/frontier_coverage_25": -0.027446018159389497,
"rewards/frontier_coverage_5": -0.027446018159389497,
"rewards/frontier_entropy_batch_reward": -0.3694201588630676,
"signal/accuracy_reward/centered_abs_mean": 0.13590494990348817,
"signal/accuracy_reward/group_std_mean": 0.1884896844625473,
"signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8468737006187439,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06795247495174409,
"signal/advantage_abs_mean": 0.752357542514801,
"signal/advantage_pre_scale_abs_mean": 0.09376581460237503,
"signal/advantage_pre_scale_std": 0.1467900037765503,
"signal/advantage_std": 0.983231246471405,
"signal/brier_reward/centered_abs_mean": 0.1709260106086731,
"signal/brier_reward/group_std_mean": 0.21340954005718232,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21400478780269622,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017092601954936983,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022889725863933563,
"signal/confidence_uniqueness_reward/group_std_mean": 0.037352363020181654,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02867819517850876,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022889725398272274,
"signal/format_reward/centered_abs_mean": 0.008599175233393907,
"signal/format_reward/group_std_mean": 0.019867047667503357,
"signal/format_reward/group_zero_std_frac": 0.9055555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.053667180240154266,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004299587616696954,
"signal/frontier_coverage_0/centered_abs_mean": 0.15505702197551727,
"signal/frontier_coverage_0/group_std_mean": 0.2044772982597351,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.027795213833451272,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022173153702169657,
"signal/frontier_coverage_1/centered_abs_mean": 0.15505702197551727,
"signal/frontier_coverage_1/group_std_mean": 0.2044772982597351,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.027795213833451272,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022173153702169657,
"signal/frontier_coverage_10/centered_abs_mean": 0.15505702197551727,
"signal/frontier_coverage_10/group_std_mean": 0.2044772982597351,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027795213833451272,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022173153702169657,
"signal/frontier_coverage_15/centered_abs_mean": 0.15505702197551727,
"signal/frontier_coverage_15/group_std_mean": 0.2044772982597351,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027795213833451272,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022173153702169657,
"signal/frontier_coverage_20/centered_abs_mean": 0.15505702197551727,
"signal/frontier_coverage_20/group_std_mean": 0.2044772982597351,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027795213833451272,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022173153702169657,
"signal/frontier_coverage_25/centered_abs_mean": 0.15505702197551727,
"signal/frontier_coverage_25/group_std_mean": 0.2044772982597351,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027795213833451272,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022173153702169657,
"signal/frontier_coverage_5/centered_abs_mean": 0.15505702197551727,
"signal/frontier_coverage_5/group_std_mean": 0.2044772982597351,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.027795213833451272,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022173153702169657,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38333263993263245,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4431163430213928,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48075162172317504,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038333263248205185,
"step": 95
},
{
"calibration/aurc": 0.20759311463380917,
"calibration/batch_distribution_entropy": 0.9477314132460055,
"calibration/buffer_distribution_entropy": 0.9242834024494486,
"calibration/confidence_entropy": 0.5270175050710435,
"calibration/coverage@0%": 0.009675166218401008,
"calibration/coverage@1%": 0.009675166218401008,
"calibration/coverage@10%": 0.0938530992208724,
"calibration/coverage@15%": 0.26268486014223075,
"calibration/coverage@20%": 0.616002402139018,
"calibration/coverage@25%": 0.7508021390374331,
"calibration/coverage@30%": 0.9032085561497327,
"calibration/coverage@5%": 0.009675166218401008,
"calibration/ece": 0.1630102667152334,
"calibration/mean_confidence": 0.6048623992059596,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025954861111111116,
"completions/max_length": 3420.6,
"completions/max_terminated_length": 3420.6,
"completions/mean_length": 632.4453002929688,
"completions/mean_terminated_length": 649.3080322265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.6,
"epoch": 0.23999700003749952,
"grad_norm": 0.002470463514328003,
"learning_rate": 3.2530120481927713e-06,
"loss": -0.0635,
"num_tokens": 203610257.0,
"reward": 0.9558441638946533,
"reward_std": 0.1599712163209915,
"rewards/accuracy_reward": 0.6660590291023254,
"rewards/brier_reward": 0.7662190675735474,
"rewards/confidence_uniqueness_reward": 0.9209245562553405,
"rewards/format_reward": 0.9719617962837219,
"rewards/frontier_coverage_0": -0.00996593926101923,
"rewards/frontier_coverage_1": -0.00996593926101923,
"rewards/frontier_coverage_10": -0.00996593926101923,
"rewards/frontier_coverage_15": -0.00996593926101923,
"rewards/frontier_coverage_20": -0.00996593926101923,
"rewards/frontier_coverage_25": -0.00996593926101923,
"rewards/frontier_coverage_5": -0.00996593926101923,
"rewards/frontier_entropy_batch_reward": -0.3088305056095123,
"signal/accuracy_reward/centered_abs_mean": 0.1613226979970932,
"signal/accuracy_reward/group_std_mean": 0.2118624597787857,
"signal/accuracy_reward/group_zero_std_frac": 0.397222226858139,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9061164379119873,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0806613489985466,
"signal/advantage_abs_mean": 0.7433346390724183,
"signal/advantage_pre_scale_abs_mean": 0.11672266870737076,
"signal/advantage_pre_scale_std": 0.19095246195793153,
"signal/advantage_std": 0.9833595633506775,
"signal/brier_reward/centered_abs_mean": 0.17435405254364014,
"signal/brier_reward/group_std_mean": 0.22024931907653808,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19577785432338715,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017435405775904654,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05343219414353371,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0928901955485344,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05993582606315613,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005343219451606274,
"signal/format_reward/centered_abs_mean": 0.04528537318110466,
"signal/format_reward/group_std_mean": 0.08404082655906678,
"signal/format_reward/group_zero_std_frac": 0.6611111044883728,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.25386848151683805,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02264268659055233,
"signal/frontier_coverage_0/centered_abs_mean": 0.16983620524406434,
"signal/frontier_coverage_0/group_std_mean": 0.22350181639194489,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02728012129664421,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024286577478051185,
"signal/frontier_coverage_1/centered_abs_mean": 0.16983620524406434,
"signal/frontier_coverage_1/group_std_mean": 0.22350181639194489,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02728012129664421,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024286577478051185,
"signal/frontier_coverage_10/centered_abs_mean": 0.16983620524406434,
"signal/frontier_coverage_10/group_std_mean": 0.22350181639194489,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02728012129664421,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024286577478051185,
"signal/frontier_coverage_15/centered_abs_mean": 0.16983620524406434,
"signal/frontier_coverage_15/group_std_mean": 0.22350181639194489,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02728012129664421,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024286577478051185,
"signal/frontier_coverage_20/centered_abs_mean": 0.16983620524406434,
"signal/frontier_coverage_20/group_std_mean": 0.22350181639194489,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02728012129664421,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024286577478051185,
"signal/frontier_coverage_25/centered_abs_mean": 0.16983620524406434,
"signal/frontier_coverage_25/group_std_mean": 0.22350181639194489,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02728012129664421,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024286577478051185,
"signal/frontier_coverage_5/centered_abs_mean": 0.16983620524406434,
"signal/frontier_coverage_5/group_std_mean": 0.22350181639194489,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02728012129664421,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024286577478051185,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3497317969799042,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4170763075351715,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.39279434084892273,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034973180294036864,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_calibration/aurc": 0.163130081233984,
"eval_calibration/batch_distribution_entropy": 0.9014365191445948,
"eval_calibration/buffer_distribution_entropy": 0.9264574248539187,
"eval_calibration/confidence_entropy": 0.5035177426390464,
"eval_calibration/coverage@0%": 0.15947580645161288,
"eval_calibration/coverage@1%": 0.15947580645161288,
"eval_calibration/coverage@10%": 0.3776993727598566,
"eval_calibration/coverage@15%": 0.4990255376344086,
"eval_calibration/coverage@20%": 0.717909946236559,
"eval_calibration/coverage@25%": 0.8870967741935484,
"eval_calibration/coverage@30%": 0.9946236559139785,
"eval_calibration/coverage@5%": 0.24801747311827957,
"eval_calibration/ece": 0.272578609146035,
"eval_calibration/mean_confidence": 0.6127355205524417,
"eval_completions/clipped_ratio": 0.024131944444444442,
"eval_completions/max_length": 2405.1666666666665,
"eval_completions/max_terminated_length": 2405.1666666666665,
"eval_completions/mean_length": 635.5255432128906,
"eval_completions/mean_terminated_length": 651.1894124348959,
"eval_completions/min_length": 0.0,
"eval_completions/min_terminated_length": 199.0,
"eval_loss": 0.0,
"eval_num_tokens": 203610257.0,
"eval_reward": 0.8911056915918986,
"eval_reward_std": 0.2602160597840945,
"eval_rewards/accuracy_reward": 0.6770833333333334,
"eval_rewards/brier_reward": 0.7757821977138519,
"eval_rewards/confidence_uniqueness_reward": 0.8653644323348999,
"eval_rewards/format_reward": 0.9722222089767456,
"eval_rewards/frontier_coverage_0": -0.004391265024120609,
"eval_rewards/frontier_coverage_1": -0.004391265024120609,
"eval_rewards/frontier_coverage_10": -0.004391265024120609,
"eval_rewards/frontier_coverage_15": -0.004391265024120609,
"eval_rewards/frontier_coverage_20": -0.004391265024120609,
"eval_rewards/frontier_coverage_25": -0.004391265024120609,
"eval_rewards/frontier_coverage_5": -0.004391265024120609,
"eval_rewards/frontier_entropy_batch_reward": -0.9722222089767456,
"eval_runtime": 207.7682,
"eval_samples_per_second": 4.813,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4248046825329463,
"eval_signal/accuracy_reward/group_std_mean": 0.4674356331427892,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8317528963088989,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21240234126647314,
"eval_signal/advantage_abs_mean": 0.8401626845200857,
"eval_signal/advantage_pre_scale_abs_mean": 0.21814856926600137,
"eval_signal/advantage_pre_scale_std": 0.2586393654346466,
"eval_signal/advantage_std": 0.9864379862944285,
"eval_signal/brier_reward/centered_abs_mean": 0.21349711219469705,
"eval_signal/brier_reward/group_std_mean": 0.27180638660987216,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0832139253616333,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021349711654086907,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0751443641881148,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.14167124529679617,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02908085659146309,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0075144364188114805,
"eval_signal/format_reward/centered_abs_mean": 0.05284288184096416,
"eval_signal/format_reward/group_std_mean": 0.1326932366937399,
"eval_signal/format_reward/group_zero_std_frac": 0.3333333407839139,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.10094406145314376,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.02642144092048208,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.21956058591604233,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3250137319167455,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012314057908952236,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031397163790340223,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.21956058591604233,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3250137319167455,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012314057908952236,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031397163790340223,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.21956058591604233,
"eval_signal/frontier_coverage_10/group_std_mean": 0.3250137319167455,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012314057908952236,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031397163790340223,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.21956058591604233,
"eval_signal/frontier_coverage_15/group_std_mean": 0.3250137319167455,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012314057908952236,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031397163790340223,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.21956058591604233,
"eval_signal/frontier_coverage_20/group_std_mean": 0.3250137319167455,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012314057908952236,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031397163790340223,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.21956058591604233,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3250137319167455,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012314057908952236,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031397163790340223,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.21956058591604233,
"eval_signal/frontier_coverage_5/group_std_mean": 0.3250137319167455,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012314057908952236,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031397163790340223,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05284288184096416,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.1326932366937399,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3333333407839139,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02018881356343627,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005284288238423566,
"eval_steps_per_second": 0.029,
"step": 100
},
{
"calibration/aurc": 0.27758447836015676,
"calibration/batch_distribution_entropy": 0.9414206143630739,
"calibration/buffer_distribution_entropy": 0.927855162724946,
"calibration/confidence_entropy": 0.47734935606033496,
"calibration/coverage@0%": 0.0075463989800637786,
"calibration/coverage@1%": 0.0075463989800637786,
"calibration/coverage@10%": 0.12135855367619637,
"calibration/coverage@15%": 0.24880930719793426,
"calibration/coverage@20%": 0.33652223634634276,
"calibration/coverage@25%": 0.4669609459964935,
"calibration/coverage@30%": 0.6008762480729914,
"calibration/coverage@5%": 0.05174529400768809,
"calibration/ece": 0.13926885015949225,
"calibration/mean_confidence": 0.6254293779927222,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025,
"completions/max_length": 3690.0,
"completions/max_terminated_length": 3690.0,
"completions/mean_length": 632.2384643554688,
"completions/mean_terminated_length": 648.3413330078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.2519968500393745,
"grad_norm": 0.0030207051895558834,
"learning_rate": 3.1024096385542172e-06,
"loss": -0.07,
"num_tokens": 213970508.0,
"reward": 0.9623825907707214,
"reward_std": 0.1516391783952713,
"rewards/accuracy_reward": 0.6657986164093017,
"rewards/brier_reward": 0.7901792764663697,
"rewards/confidence_uniqueness_reward": 0.9224253416061401,
"rewards/format_reward": 0.9745659708976746,
"rewards/frontier_coverage_0": 0.02123640524223447,
"rewards/frontier_coverage_1": 0.02123640524223447,
"rewards/frontier_coverage_10": 0.02123640524223447,
"rewards/frontier_coverage_15": 0.02123640524223447,
"rewards/frontier_coverage_20": 0.02123640524223447,
"rewards/frontier_coverage_25": 0.02123640524223447,
"rewards/frontier_coverage_5": 0.02123640524223447,
"rewards/frontier_entropy_batch_reward": -0.3118593841791153,
"signal/accuracy_reward/centered_abs_mean": 0.15443793088197708,
"signal/accuracy_reward/group_std_mean": 0.20904378294944764,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9191455960273742,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07721896544098854,
"signal/advantage_abs_mean": 0.7209781050682068,
"signal/advantage_pre_scale_abs_mean": 0.10629049986600876,
"signal/advantage_pre_scale_std": 0.1804076611995697,
"signal/advantage_std": 0.9832780361175537,
"signal/brier_reward/centered_abs_mean": 0.15626430809497832,
"signal/brier_reward/group_std_mean": 0.20239726901054383,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18771646320819854,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015626430884003638,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.052898465842008593,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09158898591995239,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06356689184904099,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005289846519008279,
"signal/format_reward/centered_abs_mean": 0.04350586049258709,
"signal/format_reward/group_std_mean": 0.08119002729654312,
"signal/format_reward/group_zero_std_frac": 0.6722222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2607091456651688,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.021752930246293545,
"signal/frontier_coverage_0/centered_abs_mean": 0.17601246535778045,
"signal/frontier_coverage_0/group_std_mean": 0.23166741728782653,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030123594403266906,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025169781874865294,
"signal/frontier_coverage_1/centered_abs_mean": 0.17601246535778045,
"signal/frontier_coverage_1/group_std_mean": 0.23166741728782653,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030123594403266906,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025169781874865294,
"signal/frontier_coverage_10/centered_abs_mean": 0.17601246535778045,
"signal/frontier_coverage_10/group_std_mean": 0.23166741728782653,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030123594403266906,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025169781874865294,
"signal/frontier_coverage_15/centered_abs_mean": 0.17601246535778045,
"signal/frontier_coverage_15/group_std_mean": 0.23166741728782653,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030123594403266906,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025169781874865294,
"signal/frontier_coverage_20/centered_abs_mean": 0.17601246535778045,
"signal/frontier_coverage_20/group_std_mean": 0.23166741728782653,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030123594403266906,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025169781874865294,
"signal/frontier_coverage_25/centered_abs_mean": 0.17601246535778045,
"signal/frontier_coverage_25/group_std_mean": 0.23166741728782653,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030123594403266906,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025169781874865294,
"signal/frontier_coverage_5/centered_abs_mean": 0.17601246535778045,
"signal/frontier_coverage_5/group_std_mean": 0.23166741728782653,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030123594403266906,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025169781874865294,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.334915554523468,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4050456404685974,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40600050091743467,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033491555601358414,
"step": 105
},
{
"calibration/aurc": 0.1623917508484302,
"calibration/batch_distribution_entropy": 0.9382471960574534,
"calibration/buffer_distribution_entropy": 0.9295560400990392,
"calibration/confidence_entropy": 0.48187896497077476,
"calibration/coverage@0%": 0.029851500799161275,
"calibration/coverage@1%": 0.029851500799161275,
"calibration/coverage@10%": 0.4238398178142801,
"calibration/coverage@15%": 0.5081844529158787,
"calibration/coverage@20%": 0.6088912176070422,
"calibration/coverage@25%": 0.8298052000748772,
"calibration/coverage@30%": 0.9122503108164111,
"calibration/coverage@5%": 0.22448740089895208,
"calibration/ece": 0.1263346756856248,
"calibration/mean_confidence": 0.6134829214978298,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666666666652,
"completions/max_length": 3428.0,
"completions/max_terminated_length": 3428.0,
"completions/mean_length": 629.52822265625,
"completions/mean_terminated_length": 636.1643432617187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 141.8,
"epoch": 0.2639967000412495,
"grad_norm": 0.003066908335313201,
"learning_rate": 2.9518072289156627e-06,
"loss": -0.0232,
"num_tokens": 224331121.0,
"reward": 0.9897796273231506,
"reward_std": 0.12779354751110078,
"rewards/accuracy_reward": 0.7029513835906982,
"rewards/brier_reward": 0.7993221998214721,
"rewards/confidence_uniqueness_reward": 0.9374632716178894,
"rewards/format_reward": 0.9893229126930236,
"rewards/frontier_coverage_0": 0.0023610764765180647,
"rewards/frontier_coverage_1": 0.0023610764765180647,
"rewards/frontier_coverage_10": 0.0023610764765180647,
"rewards/frontier_coverage_15": 0.0023610764765180647,
"rewards/frontier_coverage_20": 0.0023610764765180647,
"rewards/frontier_coverage_25": 0.0023610764765180647,
"rewards/frontier_coverage_5": 0.0023610764765180647,
"rewards/frontier_entropy_batch_reward": -0.3027245044708252,
"signal/accuracy_reward/centered_abs_mean": 0.1584743946790695,
"signal/accuracy_reward/group_std_mean": 0.20634441077709198,
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0239338517189025,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07923719733953476,
"signal/advantage_abs_mean": 0.7589234232902526,
"signal/advantage_pre_scale_abs_mean": 0.0962674856185913,
"signal/advantage_pre_scale_std": 0.1529387891292572,
"signal/advantage_std": 0.9831875920295715,
"signal/brier_reward/centered_abs_mean": 0.14430948197841645,
"signal/brier_reward/group_std_mean": 0.1849027007818222,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1869402378797531,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014430948719382285,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03034689761698246,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05047857165336609,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03914179354906082,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030346899293363094,
"signal/format_reward/centered_abs_mean": 0.017876519449055196,
"signal/format_reward/group_std_mean": 0.03566114716231823,
"signal/format_reward/group_zero_std_frac": 0.8444444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11467134803533555,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008938259724527598,
"signal/frontier_coverage_0/centered_abs_mean": 0.17844413220882416,
"signal/frontier_coverage_0/group_std_mean": 0.23779484033584594,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03313328959047794,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002551751025021076,
"signal/frontier_coverage_1/centered_abs_mean": 0.17844413220882416,
"signal/frontier_coverage_1/group_std_mean": 0.23779484033584594,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03313328959047794,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002551751025021076,
"signal/frontier_coverage_10/centered_abs_mean": 0.17844413220882416,
"signal/frontier_coverage_10/group_std_mean": 0.23779484033584594,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03313328959047794,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002551751025021076,
"signal/frontier_coverage_15/centered_abs_mean": 0.17844413220882416,
"signal/frontier_coverage_15/group_std_mean": 0.23779484033584594,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03313328959047794,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002551751025021076,
"signal/frontier_coverage_20/centered_abs_mean": 0.17844413220882416,
"signal/frontier_coverage_20/group_std_mean": 0.23779484033584594,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03313328959047794,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002551751025021076,
"signal/frontier_coverage_25/centered_abs_mean": 0.17844413220882416,
"signal/frontier_coverage_25/group_std_mean": 0.23779484033584594,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03313328959047794,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002551751025021076,
"signal/frontier_coverage_5/centered_abs_mean": 0.17844413220882416,
"signal/frontier_coverage_5/group_std_mean": 0.23779484033584594,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03313328959047794,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002551751025021076,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.336598539352417,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40353216528892516,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4363815426826477,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03365985415875912,
"step": 110
},
{
"calibration/aurc": 0.2785963055836248,
"calibration/batch_distribution_entropy": 0.9652125750310209,
"calibration/buffer_distribution_entropy": 0.9330482648173744,
"calibration/confidence_entropy": 0.49621660057634137,
"calibration/coverage@0%": 0.007910071105482502,
"calibration/coverage@1%": 0.007910071105482502,
"calibration/coverage@10%": 0.029982246120544936,
"calibration/coverage@15%": 0.1566908143448694,
"calibration/coverage@20%": 0.3872066256148847,
"calibration/coverage@25%": 0.582606519819888,
"calibration/coverage@30%": 0.6653970694296458,
"calibration/coverage@5%": 0.007910071105482502,
"calibration/ece": 0.1682723224397847,
"calibration/mean_confidence": 0.5473267088527768,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.016145833333333325,
"completions/max_length": 3463.0,
"completions/max_terminated_length": 3463.0,
"completions/mean_length": 619.2389770507813,
"completions/mean_terminated_length": 629.3917236328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.4,
"epoch": 0.27599655004312446,
"grad_norm": 0.002968505723401904,
"learning_rate": 2.8012048192771087e-06,
"loss": -0.0457,
"num_tokens": 234543954.0,
"reward": 0.9655173897743226,
"reward_std": 0.13793158531188965,
"rewards/accuracy_reward": 0.6552951335906982,
"rewards/brier_reward": 0.7801418542861939,
"rewards/confidence_uniqueness_reward": 0.9340383648872376,
"rewards/format_reward": 0.9837673664093017,
"rewards/frontier_coverage_0": 0.02393667958676815,
"rewards/frontier_coverage_1": 0.02393667958676815,
"rewards/frontier_coverage_10": 0.02393667958676815,
"rewards/frontier_coverage_15": 0.02393667958676815,
"rewards/frontier_coverage_20": 0.02393667958676815,
"rewards/frontier_coverage_25": 0.02393667958676815,
"rewards/frontier_coverage_5": 0.02393667958676815,
"rewards/frontier_entropy_batch_reward": -0.27827951312065125,
"signal/accuracy_reward/centered_abs_mean": 0.16129014790058135,
"signal/accuracy_reward/group_std_mean": 0.20588865578174592,
"signal/accuracy_reward/group_zero_std_frac": 0.4388889014720917,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9973422050476074,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08064507395029068,
"signal/advantage_abs_mean": 0.7537703037261962,
"signal/advantage_pre_scale_abs_mean": 0.10260143429040909,
"signal/advantage_pre_scale_std": 0.16498699486255647,
"signal/advantage_std": 0.983244001865387,
"signal/brier_reward/centered_abs_mean": 0.16035984754562377,
"signal/brier_reward/group_std_mean": 0.20447275638580323,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19879674315452575,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01603598427027464,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037899629771709444,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06762906014919282,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04686418101191521,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037899629678577185,
"signal/format_reward/centered_abs_mean": 0.02791341170668602,
"signal/format_reward/group_std_mean": 0.05625998750329018,
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17200126945972444,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01395670585334301,
"signal/frontier_coverage_0/centered_abs_mean": 0.2007855713367462,
"signal/frontier_coverage_0/group_std_mean": 0.2609905391931534,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035559892654418945,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028712335973978043,
"signal/frontier_coverage_1/centered_abs_mean": 0.2007855713367462,
"signal/frontier_coverage_1/group_std_mean": 0.2609905391931534,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035559892654418945,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028712335973978043,
"signal/frontier_coverage_10/centered_abs_mean": 0.2007855713367462,
"signal/frontier_coverage_10/group_std_mean": 0.2609905391931534,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.035559892654418945,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028712335973978043,
"signal/frontier_coverage_15/centered_abs_mean": 0.2007855713367462,
"signal/frontier_coverage_15/group_std_mean": 0.2609905391931534,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.035559892654418945,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028712335973978043,
"signal/frontier_coverage_20/centered_abs_mean": 0.2007855713367462,
"signal/frontier_coverage_20/group_std_mean": 0.2609905391931534,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035559892654418945,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028712335973978043,
"signal/frontier_coverage_25/centered_abs_mean": 0.2007855713367462,
"signal/frontier_coverage_25/group_std_mean": 0.2609905391931534,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.035559892654418945,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028712335973978043,
"signal/frontier_coverage_5/centered_abs_mean": 0.2007855713367462,
"signal/frontier_coverage_5/group_std_mean": 0.2609905391931534,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035559892654418945,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028712335973978043,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.335198974609375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4032855689525604,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4162396967411041,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033519898727536204,
"step": 115
},
{
"calibration/aurc": 0.25985110868658695,
"calibration/batch_distribution_entropy": 0.9631548287364671,
"calibration/buffer_distribution_entropy": 0.9368432393792772,
"calibration/confidence_entropy": 0.45159534427369225,
"calibration/coverage@0%": 0.0010471275946903505,
"calibration/coverage@1%": 0.0010471275946903505,
"calibration/coverage@10%": 0.21385096429441958,
"calibration/coverage@15%": 0.39416616560362666,
"calibration/coverage@20%": 0.4899177481296418,
"calibration/coverage@25%": 0.5565116507652067,
"calibration/coverage@30%": 0.6357331500523827,
"calibration/coverage@5%": 0.02151956853957224,
"calibration/ece": 0.16906323787142602,
"calibration/mean_confidence": 0.553027232437221,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009895833333333348,
"completions/max_length": 3375.2,
"completions/max_terminated_length": 3375.2,
"completions/mean_length": 620.9748413085938,
"completions/mean_terminated_length": 627.0546997070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.2,
"epoch": 0.28799640004499943,
"grad_norm": 0.003942957613617182,
"learning_rate": 2.6506024096385547e-06,
"loss": -0.0242,
"num_tokens": 244779440.0,
"reward": 0.9828472375869751,
"reward_std": 0.1266437292098999,
"rewards/accuracy_reward": 0.6784722208976746,
"rewards/brier_reward": 0.7945773005485535,
"rewards/confidence_uniqueness_reward": 0.9401492238044739,
"rewards/format_reward": 0.9900173664093017,
"rewards/frontier_coverage_0": 0.02642001286149025,
"rewards/frontier_coverage_1": 0.02642001286149025,
"rewards/frontier_coverage_10": 0.02642001286149025,
"rewards/frontier_coverage_15": 0.02642001286149025,
"rewards/frontier_coverage_20": 0.02642001286149025,
"rewards/frontier_coverage_25": 0.02642001286149025,
"rewards/frontier_coverage_5": 0.02642001286149025,
"rewards/frontier_entropy_batch_reward": -0.2751484811306,
"signal/accuracy_reward/centered_abs_mean": 0.15174696147441863,
"signal/accuracy_reward/group_std_mean": 0.2080085426568985,
"signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9443390369415283,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07587348073720931,
"signal/advantage_abs_mean": 0.7379367828369141,
"signal/advantage_pre_scale_abs_mean": 0.0910405844449997,
"signal/advantage_pre_scale_std": 0.14757494032382965,
"signal/advantage_std": 0.9832143902778625,
"signal/brier_reward/centered_abs_mean": 0.16116996705532075,
"signal/brier_reward/group_std_mean": 0.20735826790332795,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20363759398460388,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016116996854543687,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03047032840549946,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05480174720287323,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03763532117009163,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003047033119946718,
"signal/format_reward/centered_abs_mean": 0.01847330704331398,
"signal/format_reward/group_std_mean": 0.040765970945358276,
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11124600917100906,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00923665352165699,
"signal/frontier_coverage_0/centered_abs_mean": 0.20754149556159973,
"signal/frontier_coverage_0/group_std_mean": 0.2732445240020752,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03756205141544342,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002967843320220709,
"signal/frontier_coverage_1/centered_abs_mean": 0.20754149556159973,
"signal/frontier_coverage_1/group_std_mean": 0.2732445240020752,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03756205141544342,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002967843320220709,
"signal/frontier_coverage_10/centered_abs_mean": 0.20754149556159973,
"signal/frontier_coverage_10/group_std_mean": 0.2732445240020752,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03756205141544342,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002967843320220709,
"signal/frontier_coverage_15/centered_abs_mean": 0.20754149556159973,
"signal/frontier_coverage_15/group_std_mean": 0.2732445240020752,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03756205141544342,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002967843320220709,
"signal/frontier_coverage_20/centered_abs_mean": 0.20754149556159973,
"signal/frontier_coverage_20/group_std_mean": 0.2732445240020752,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03756205141544342,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002967843320220709,
"signal/frontier_coverage_25/centered_abs_mean": 0.20754149556159973,
"signal/frontier_coverage_25/group_std_mean": 0.2732445240020752,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03756205141544342,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002967843320220709,
"signal/frontier_coverage_5/centered_abs_mean": 0.20754149556159973,
"signal/frontier_coverage_5/group_std_mean": 0.2732445240020752,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03756205141544342,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002967843320220709,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3402287781238556,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41207742094993594,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4308716356754303,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0340228796005249,
"step": 120
},
{
"calibration/aurc": 0.1702944249457136,
"calibration/batch_distribution_entropy": 0.9474281075428994,
"calibration/buffer_distribution_entropy": 0.9399565674518694,
"calibration/confidence_entropy": 0.4981072863512715,
"calibration/coverage@0%": 0.10222342219869385,
"calibration/coverage@1%": 0.18714101358250584,
"calibration/coverage@10%": 0.352484518804438,
"calibration/coverage@15%": 0.38542244882575055,
"calibration/coverage@20%": 0.5395064520329215,
"calibration/coverage@25%": 0.7593703528573289,
"calibration/coverage@30%": 0.8751834871815959,
"calibration/coverage@5%": 0.306036520545082,
"calibration/ece": 0.16359838991861636,
"calibration/mean_confidence": 0.5928311777908328,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005468750000000022,
"completions/max_length": 2949.2,
"completions/max_terminated_length": 2949.2,
"completions/mean_length": 635.7155395507813,
"completions/mean_terminated_length": 639.208154296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.6,
"epoch": 0.2999962500468744,
"grad_norm": 0.0034135030582547188,
"learning_rate": 2.5e-06,
"loss": -0.0155,
"num_tokens": 255220547.0,
"reward": 0.9929954290390015,
"reward_std": 0.12329381704330444,
"rewards/accuracy_reward": 0.6967013955116272,
"rewards/brier_reward": 0.8013178825378418,
"rewards/confidence_uniqueness_reward": 0.9443058967590332,
"rewards/format_reward": 0.9944444298744202,
"rewards/frontier_coverage_0": 0.01054713288322091,
"rewards/frontier_coverage_1": 0.01054713288322091,
"rewards/frontier_coverage_10": 0.01054713288322091,
"rewards/frontier_coverage_15": 0.01054713288322091,
"rewards/frontier_coverage_20": 0.01054713288322091,
"rewards/frontier_coverage_25": 0.01054713288322091,
"rewards/frontier_coverage_5": 0.01054713288322091,
"rewards/frontier_entropy_batch_reward": -0.2819568753242493,
"signal/accuracy_reward/centered_abs_mean": 0.15746527910232544,
"signal/accuracy_reward/group_std_mean": 0.2058452069759369,
"signal/accuracy_reward/group_zero_std_frac": 0.4194444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0113989472389222,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07873263955116272,
"signal/advantage_abs_mean": 0.7568188905715942,
"signal/advantage_pre_scale_abs_mean": 0.0927268460392952,
"signal/advantage_pre_scale_std": 0.14396594166755677,
"signal/advantage_std": 0.9831969380378723,
"signal/brier_reward/centered_abs_mean": 0.14777444005012513,
"signal/brier_reward/group_std_mean": 0.18979325294494628,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18979544341564178,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014777444303035736,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022845935076475143,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03979781419038773,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029593577980995177,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002284593554213643,
"signal/format_reward/centered_abs_mean": 0.0103624127805233,
"signal/format_reward/group_std_mean": 0.024810751900076866,
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0675680547952652,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00518120639026165,
"signal/frontier_coverage_0/centered_abs_mean": 0.19016571938991547,
"signal/frontier_coverage_0/group_std_mean": 0.24880056381225585,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0349818117916584,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002719369810074568,
"signal/frontier_coverage_1/centered_abs_mean": 0.19016571938991547,
"signal/frontier_coverage_1/group_std_mean": 0.24880056381225585,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0349818117916584,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002719369810074568,
"signal/frontier_coverage_10/centered_abs_mean": 0.19016571938991547,
"signal/frontier_coverage_10/group_std_mean": 0.24880056381225585,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0349818117916584,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002719369810074568,
"signal/frontier_coverage_15/centered_abs_mean": 0.19016571938991547,
"signal/frontier_coverage_15/group_std_mean": 0.24880056381225585,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0349818117916584,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002719369810074568,
"signal/frontier_coverage_20/centered_abs_mean": 0.19016571938991547,
"signal/frontier_coverage_20/group_std_mean": 0.24880056381225585,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0349818117916584,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002719369810074568,
"signal/frontier_coverage_25/centered_abs_mean": 0.19016571938991547,
"signal/frontier_coverage_25/group_std_mean": 0.24880056381225585,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0349818117916584,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002719369810074568,
"signal/frontier_coverage_5/centered_abs_mean": 0.19016571938991547,
"signal/frontier_coverage_5/group_std_mean": 0.24880056381225585,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0349818117916584,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002719369810074568,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3356120824813843,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4034553825855255,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4321089446544647,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03356120809912681,
"step": 125
},
{
"calibration/aurc": 0.21974228957333825,
"calibration/batch_distribution_entropy": 0.957767837624923,
"calibration/buffer_distribution_entropy": 0.9414282291526312,
"calibration/confidence_entropy": 0.4740419511530939,
"calibration/coverage@0%": 0.005249582744674368,
"calibration/coverage@1%": 0.005249582744674368,
"calibration/coverage@10%": 0.22161801602834014,
"calibration/coverage@15%": 0.2912255509209624,
"calibration/coverage@20%": 0.5404524177537375,
"calibration/coverage@25%": 0.7363437705207907,
"calibration/coverage@30%": 0.8552729630880902,
"calibration/coverage@5%": 0.052246971778616924,
"calibration/ece": 0.13019231823487484,
"calibration/mean_confidence": 0.5658447997080991,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009722222222222233,
"completions/max_length": 3547.8,
"completions/max_terminated_length": 3547.8,
"completions/mean_length": 649.3093017578125,
"completions/mean_terminated_length": 655.8386352539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 140.2,
"epoch": 0.3119961000487494,
"grad_norm": 0.0037228513974696398,
"learning_rate": 2.349397590361446e-06,
"loss": -0.0148,
"num_tokens": 265825390.0,
"reward": 0.9755982637405396,
"reward_std": 0.13257428556680678,
"rewards/accuracy_reward": 0.6615451335906982,
"rewards/brier_reward": 0.7983024001121521,
"rewards/confidence_uniqueness_reward": 0.9400404095649719,
"rewards/format_reward": 0.9899305462837219,
"rewards/frontier_coverage_0": 0.036238094815053044,
"rewards/frontier_coverage_1": 0.036238094815053044,
"rewards/frontier_coverage_10": 0.036238094815053044,
"rewards/frontier_coverage_15": 0.036238094815053044,
"rewards/frontier_coverage_20": 0.036238094815053044,
"rewards/frontier_coverage_25": 0.036238094815053044,
"rewards/frontier_coverage_5": 0.036238094815053044,
"rewards/frontier_entropy_batch_reward": -0.2760128676891327,
"signal/accuracy_reward/centered_abs_mean": 0.18161349892616271,
"signal/accuracy_reward/group_std_mean": 0.23401132524013518,
"signal/accuracy_reward/group_zero_std_frac": 0.3555555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.1416666626930236,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09080674946308136,
"signal/advantage_abs_mean": 0.764301085472107,
"signal/advantage_pre_scale_abs_mean": 0.10157442539930343,
"signal/advantage_pre_scale_std": 0.1548332154750824,
"signal/advantage_std": 0.9832197904586792,
"signal/brier_reward/centered_abs_mean": 0.1567206412553787,
"signal/brier_reward/group_std_mean": 0.1994690865278244,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1979391247034073,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01567206475883722,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02809174992144108,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04275588467717171,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03521875329315662,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028091749642044305,
"signal/format_reward/centered_abs_mean": 0.015679253730922937,
"signal/format_reward/group_std_mean": 0.027723340317606926,
"signal/format_reward/group_zero_std_frac": 0.8888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09744075834751129,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007839626865461469,
"signal/frontier_coverage_0/centered_abs_mean": 0.2045228362083435,
"signal/frontier_coverage_0/group_std_mean": 0.2676228523254395,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0369983471930027,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002924676425755024,
"signal/frontier_coverage_1/centered_abs_mean": 0.2045228362083435,
"signal/frontier_coverage_1/group_std_mean": 0.2676228523254395,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0369983471930027,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002924676425755024,
"signal/frontier_coverage_10/centered_abs_mean": 0.2045228362083435,
"signal/frontier_coverage_10/group_std_mean": 0.2676228523254395,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0369983471930027,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002924676425755024,
"signal/frontier_coverage_15/centered_abs_mean": 0.2045228362083435,
"signal/frontier_coverage_15/group_std_mean": 0.2676228523254395,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0369983471930027,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002924676425755024,
"signal/frontier_coverage_20/centered_abs_mean": 0.2045228362083435,
"signal/frontier_coverage_20/group_std_mean": 0.2676228523254395,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0369983471930027,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002924676425755024,
"signal/frontier_coverage_25/centered_abs_mean": 0.2045228362083435,
"signal/frontier_coverage_25/group_std_mean": 0.2676228523254395,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0369983471930027,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002924676425755024,
"signal/frontier_coverage_5/centered_abs_mean": 0.2045228362083435,
"signal/frontier_coverage_5/group_std_mean": 0.2676228523254395,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0369983471930027,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002924676425755024,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3297392189502716,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39750961065292356,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4163591504096985,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032973920553922655,
"step": 130
},
{
"calibration/aurc": 0.22207504778154225,
"calibration/batch_distribution_entropy": 0.9701411645819956,
"calibration/buffer_distribution_entropy": 0.9441236363940959,
"calibration/confidence_entropy": 0.4802381286994982,
"calibration/coverage@0%": 0.013629843240210753,
"calibration/coverage@1%": 0.013629843240210753,
"calibration/coverage@10%": 0.2837484478954534,
"calibration/coverage@15%": 0.3598894524847338,
"calibration/coverage@20%": 0.5369423730219428,
"calibration/coverage@25%": 0.6182801199086677,
"calibration/coverage@30%": 0.6674144997591183,
"calibration/coverage@5%": 0.2120673432402108,
"calibration/ece": 0.15988758741196968,
"calibration/mean_confidence": 0.5410395064628674,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009027777777777768,
"completions/max_length": 3522.0,
"completions/max_terminated_length": 3522.0,
"completions/mean_length": 615.496728515625,
"completions/mean_terminated_length": 621.16328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 132.8,
"epoch": 0.32399595005062437,
"grad_norm": 0.0039030034095048904,
"learning_rate": 2.1987951807228917e-06,
"loss": -0.0258,
"num_tokens": 276008936.0,
"reward": 0.9833725333213806,
"reward_std": 0.13102127313613893,
"rewards/accuracy_reward": 0.6739583253860474,
"rewards/brier_reward": 0.788122546672821,
"rewards/confidence_uniqueness_reward": 0.9430952072143555,
"rewards/format_reward": 0.9907986164093018,
"rewards/frontier_coverage_0": 0.018168472126126288,
"rewards/frontier_coverage_1": 0.018168472126126288,
"rewards/frontier_coverage_10": 0.018168472126126288,
"rewards/frontier_coverage_15": 0.018168472126126288,
"rewards/frontier_coverage_20": 0.018168472126126288,
"rewards/frontier_coverage_25": 0.018168472126126288,
"rewards/frontier_coverage_5": 0.018168472126126288,
"rewards/frontier_entropy_batch_reward": -0.2394638776779175,
"signal/accuracy_reward/centered_abs_mean": 0.17725694477558135,
"signal/accuracy_reward/group_std_mean": 0.22965039312839508,
"signal/accuracy_reward/group_zero_std_frac": 0.36388889253139495,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0828640937805176,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08862847238779067,
"signal/advantage_abs_mean": 0.7517815709114075,
"signal/advantage_pre_scale_abs_mean": 0.09779231399297714,
"signal/advantage_pre_scale_std": 0.15307309925556184,
"signal/advantage_std": 0.983241617679596,
"signal/brier_reward/centered_abs_mean": 0.15630776584148406,
"signal/brier_reward/group_std_mean": 0.19986412227153777,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1932190716266632,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015630776807665826,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027252191677689552,
"signal/confidence_uniqueness_reward/group_std_mean": 0.046789034456014636,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034193987399339675,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027252191212028265,
"signal/format_reward/centered_abs_mean": 0.016525607742369174,
"signal/format_reward/group_std_mean": 0.03393084555864334,
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10422060191631317,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008262803871184587,
"signal/frontier_coverage_0/centered_abs_mean": 0.2169239789247513,
"signal/frontier_coverage_0/group_std_mean": 0.2821938157081604,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03821746855974197,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031020127702504397,
"signal/frontier_coverage_1/centered_abs_mean": 0.2169239789247513,
"signal/frontier_coverage_1/group_std_mean": 0.2821938157081604,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03821746855974197,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031020127702504397,
"signal/frontier_coverage_10/centered_abs_mean": 0.2169239789247513,
"signal/frontier_coverage_10/group_std_mean": 0.2821938157081604,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03821746855974197,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031020127702504397,
"signal/frontier_coverage_15/centered_abs_mean": 0.2169239789247513,
"signal/frontier_coverage_15/group_std_mean": 0.2821938157081604,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03821746855974197,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031020127702504397,
"signal/frontier_coverage_20/centered_abs_mean": 0.2169239789247513,
"signal/frontier_coverage_20/group_std_mean": 0.2821938157081604,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03821746855974197,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031020127702504397,
"signal/frontier_coverage_25/centered_abs_mean": 0.2169239789247513,
"signal/frontier_coverage_25/group_std_mean": 0.2821938157081604,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03821746855974197,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031020127702504397,
"signal/frontier_coverage_5/centered_abs_mean": 0.2169239789247513,
"signal/frontier_coverage_5/group_std_mean": 0.2821938157081604,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03821746855974197,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031020127702504397,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3079935610294342,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3808625817298889,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3828635513782501,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03079935573041439,
"step": 135
},
{
"calibration/aurc": 0.1337878460272878,
"calibration/batch_distribution_entropy": 0.9491518389855494,
"calibration/buffer_distribution_entropy": 0.9504134002453654,
"calibration/confidence_entropy": 0.470818452840835,
"calibration/coverage@0%": 0.02919666230366492,
"calibration/coverage@1%": 0.02919666230366492,
"calibration/coverage@10%": 0.45605468305013697,
"calibration/coverage@15%": 0.7058141127727731,
"calibration/coverage@20%": 0.789229159831677,
"calibration/coverage@25%": 0.8711048675379001,
"calibration/coverage@30%": 0.9482999031140213,
"calibration/coverage@5%": 0.19594786212914483,
"calibration/ece": 0.11502078156694107,
"calibration/mean_confidence": 0.6127720502496946,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00581597222222221,
"completions/max_length": 3342.4,
"completions/max_terminated_length": 3342.4,
"completions/mean_length": 629.29306640625,
"completions/mean_terminated_length": 632.9800415039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 163.2,
"epoch": 0.33599580005249935,
"grad_norm": 0.004057453945279121,
"learning_rate": 2.0481927710843377e-06,
"loss": -0.0122,
"num_tokens": 286362616.0,
"reward": 0.9767401456832886,
"reward_std": 0.12093752324581146,
"rewards/accuracy_reward": 0.6546006917953491,
"rewards/brier_reward": 0.808280074596405,
"rewards/confidence_uniqueness_reward": 0.9443035125732422,
"rewards/format_reward": 0.9940104126930237,
"rewards/frontier_coverage_0": 0.045683811977505685,
"rewards/frontier_coverage_1": 0.045683811977505685,
"rewards/frontier_coverage_10": 0.045683811977505685,
"rewards/frontier_coverage_15": 0.045683811977505685,
"rewards/frontier_coverage_20": 0.045683811977505685,
"rewards/frontier_coverage_25": 0.045683811977505685,
"rewards/frontier_coverage_5": 0.045683811977505685,
"rewards/frontier_entropy_batch_reward": -0.27396737039089203,
"signal/accuracy_reward/centered_abs_mean": 0.15417209565639495,
"signal/accuracy_reward/group_std_mean": 0.20770005285739898,
"signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9896841287612915,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07708604782819747,
"signal/advantage_abs_mean": 0.7525755763053894,
"signal/advantage_pre_scale_abs_mean": 0.08958911299705505,
"signal/advantage_pre_scale_std": 0.14011250436306,
"signal/advantage_std": 0.9831971049308776,
"signal/brier_reward/centered_abs_mean": 0.13868292272090912,
"signal/brier_reward/group_std_mean": 0.1801248759031296,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17836227416992187,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01386829260736704,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023434021696448325,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04013000652194023,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030173908919095993,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023434022441506386,
"signal/format_reward/centered_abs_mean": 0.011105685587972402,
"signal/format_reward/group_std_mean": 0.025266989693045618,
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07126235738396644,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005552842793986201,
"signal/frontier_coverage_0/centered_abs_mean": 0.19150737822055816,
"signal/frontier_coverage_0/group_std_mean": 0.25176058411598207,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0352290228009224,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027385556139051916,
"signal/frontier_coverage_1/centered_abs_mean": 0.19150737822055816,
"signal/frontier_coverage_1/group_std_mean": 0.25176058411598207,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0352290228009224,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027385556139051916,
"signal/frontier_coverage_10/centered_abs_mean": 0.19150737822055816,
"signal/frontier_coverage_10/group_std_mean": 0.25176058411598207,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0352290228009224,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027385556139051916,
"signal/frontier_coverage_15/centered_abs_mean": 0.19150737822055816,
"signal/frontier_coverage_15/group_std_mean": 0.25176058411598207,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0352290228009224,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027385556139051916,
"signal/frontier_coverage_20/centered_abs_mean": 0.19150737822055816,
"signal/frontier_coverage_20/group_std_mean": 0.25176058411598207,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0352290228009224,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027385556139051916,
"signal/frontier_coverage_25/centered_abs_mean": 0.19150737822055816,
"signal/frontier_coverage_25/group_std_mean": 0.25176058411598207,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0352290228009224,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027385556139051916,
"signal/frontier_coverage_5/centered_abs_mean": 0.19150737822055816,
"signal/frontier_coverage_5/group_std_mean": 0.25176058411598207,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0352290228009224,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027385556139051916,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32493494153022767,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39551191329956054,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41893631815910337,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03249349407851696,
"step": 140
},
{
"calibration/aurc": 0.18233118348876434,
"calibration/batch_distribution_entropy": 0.9831680569385256,
"calibration/buffer_distribution_entropy": 0.9598290512239208,
"calibration/confidence_entropy": 0.4897688461891715,
"calibration/coverage@0%": 0.04602953909832603,
"calibration/coverage@1%": 0.04602953909832603,
"calibration/coverage@10%": 0.37472474066657796,
"calibration/coverage@15%": 0.47529519217066457,
"calibration/coverage@20%": 0.6035010202835199,
"calibration/coverage@25%": 0.7062519554070845,
"calibration/coverage@30%": 0.8091139749667903,
"calibration/coverage@5%": 0.14104869190855002,
"calibration/ece": 0.16769359034482495,
"calibration/mean_confidence": 0.5345419757715583,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005468750000000022,
"completions/max_length": 3000.6,
"completions/max_terminated_length": 3000.6,
"completions/mean_length": 603.790283203125,
"completions/mean_terminated_length": 607.152490234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.6,
"epoch": 0.34799565005437433,
"grad_norm": 0.004024908412247896,
"learning_rate": 1.8975903614457832e-06,
"loss": -0.0099,
"num_tokens": 296382888.0,
"reward": 1.0045419931411743,
"reward_std": 0.10734798014163971,
"rewards/accuracy_reward": 0.7182291746139526,
"rewards/brier_reward": 0.797440505027771,
"rewards/confidence_uniqueness_reward": 0.9454038500785827,
"rewards/format_reward": 0.9941840171813965,
"rewards/frontier_coverage_0": -0.007990724965929985,
"rewards/frontier_coverage_1": -0.007990724965929985,
"rewards/frontier_coverage_10": -0.007990724965929985,
"rewards/frontier_coverage_15": -0.007990724965929985,
"rewards/frontier_coverage_20": -0.007990724965929985,
"rewards/frontier_coverage_25": -0.0050966314971446994,
"rewards/frontier_coverage_5": -0.007990724965929985,
"rewards/frontier_entropy_batch_reward": -0.25190583765506747,
"signal/accuracy_reward/centered_abs_mean": 0.12996961772441865,
"signal/accuracy_reward/group_std_mean": 0.18141130805015565,
"signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9297018647193909,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06498480886220932,
"signal/advantage_abs_mean": 0.7460544824600219,
"signal/advantage_pre_scale_abs_mean": 0.07965542376041412,
"signal/advantage_pre_scale_std": 0.12871635258197783,
"signal/advantage_std": 0.9830474495887757,
"signal/brier_reward/centered_abs_mean": 0.1287109524011612,
"signal/brier_reward/group_std_mean": 0.16658840775489808,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18521082699298858,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012871095538139343,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02220791019499302,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033020298555493356,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031845220178365705,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022207909962162374,
"signal/format_reward/centered_abs_mean": 0.009825303871184587,
"signal/format_reward/group_std_mean": 0.017753782123327254,
"signal/format_reward/group_zero_std_frac": 0.9277778029441833,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06998921409249306,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004912651935592293,
"signal/frontier_coverage_0/centered_abs_mean": 0.17401364147663118,
"signal/frontier_coverage_0/group_std_mean": 0.23170343041419983,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035727670043706895,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024883949663490057,
"signal/frontier_coverage_1/centered_abs_mean": 0.17401364147663118,
"signal/frontier_coverage_1/group_std_mean": 0.23170343041419983,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035727670043706895,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024883949663490057,
"signal/frontier_coverage_10/centered_abs_mean": 0.17401364147663118,
"signal/frontier_coverage_10/group_std_mean": 0.23170343041419983,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.035727670043706895,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024883949663490057,
"signal/frontier_coverage_15/centered_abs_mean": 0.17401364147663118,
"signal/frontier_coverage_15/group_std_mean": 0.23170343041419983,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.035727670043706895,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024883949663490057,
"signal/frontier_coverage_20/centered_abs_mean": 0.17401364147663118,
"signal/frontier_coverage_20/group_std_mean": 0.23170343041419983,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035727670043706895,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024883949663490057,
"signal/frontier_coverage_25/centered_abs_mean": 0.15727486312389374,
"signal/frontier_coverage_25/group_std_mean": 0.20966436564922333,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03222865499556064,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022490305360406636,
"signal/frontier_coverage_5/centered_abs_mean": 0.17401364147663118,
"signal/frontier_coverage_5/group_std_mean": 0.23170343041419983,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035727670043706895,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024883949663490057,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30758561491966246,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3782775580883026,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44249006509780886,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030758562684059142,
"step": 145
},
{
"calibration/aurc": 0.19084036200929547,
"calibration/batch_distribution_entropy": 0.9682289496498896,
"calibration/buffer_distribution_entropy": 0.9670171964368471,
"calibration/confidence_entropy": 0.5042296552565959,
"calibration/coverage@0%": 0.06598648652575967,
"calibration/coverage@1%": 0.0748870100859691,
"calibration/coverage@10%": 0.36375211584191663,
"calibration/coverage@15%": 0.4656610314172543,
"calibration/coverage@20%": 0.5612869886858138,
"calibration/coverage@25%": 0.6531385987815492,
"calibration/coverage@30%": 0.7883200065274152,
"calibration/coverage@5%": 0.29452257920003927,
"calibration/ece": 0.18335817487450937,
"calibration/mean_confidence": 0.5571029847794488,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004774305555555558,
"completions/max_length": 3239.2,
"completions/max_terminated_length": 3239.2,
"completions/mean_length": 701.5655395507813,
"completions/mean_terminated_length": 704.9570678710937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.6,
"epoch": 0.3599955000562493,
"grad_norm": 0.0036906444001942873,
"learning_rate": 1.7469879518072292e-06,
"loss": -0.0084,
"num_tokens": 307575259.0,
"reward": 0.9870135068893433,
"reward_std": 0.12090798169374466,
"rewards/accuracy_reward": 0.6818576455116272,
"rewards/brier_reward": 0.8064930081367493,
"rewards/confidence_uniqueness_reward": 0.9434916973114014,
"rewards/format_reward": 0.9942708134651184,
"rewards/frontier_coverage_0": 0.02103922632522881,
"rewards/frontier_coverage_1": 0.02103922632522881,
"rewards/frontier_coverage_10": 0.02103922632522881,
"rewards/frontier_coverage_15": 0.02103922632522881,
"rewards/frontier_coverage_20": 0.021864201012067496,
"rewards/frontier_coverage_25": 0.04642558991909027,
"rewards/frontier_coverage_5": 0.02103922632522881,
"rewards/frontier_entropy_batch_reward": -0.28530060350894926,
"signal/accuracy_reward/centered_abs_mean": 0.1652289465069771,
"signal/accuracy_reward/group_std_mean": 0.2149661064147949,
"signal/accuracy_reward/group_zero_std_frac": 0.4,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0628995418548584,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08261447325348854,
"signal/advantage_abs_mean": 0.768857729434967,
"signal/advantage_pre_scale_abs_mean": 0.09277653992176056,
"signal/advantage_pre_scale_std": 0.141487255692482,
"signal/advantage_std": 0.9831639409065247,
"signal/brier_reward/centered_abs_mean": 0.13207932710647582,
"signal/brier_reward/group_std_mean": 0.16995641589164734,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17336148023605347,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013207933306694031,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02226933278143406,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0336017731577158,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029386086389422417,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002226933324709535,
"signal/format_reward/centered_abs_mean": 0.008897569379769266,
"signal/format_reward/group_std_mean": 0.017280596494674682,
"signal/format_reward/group_zero_std_frac": 0.925000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05693276599049568,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004448784689884633,
"signal/frontier_coverage_0/centered_abs_mean": 0.18552227616310119,
"signal/frontier_coverage_0/group_std_mean": 0.24258872568607331,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03458261713385582,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002652968605980277,
"signal/frontier_coverage_1/centered_abs_mean": 0.18552227616310119,
"signal/frontier_coverage_1/group_std_mean": 0.24258872568607331,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03458261713385582,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002652968605980277,
"signal/frontier_coverage_10/centered_abs_mean": 0.18552227616310119,
"signal/frontier_coverage_10/group_std_mean": 0.24258872568607331,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03458261713385582,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002652968605980277,
"signal/frontier_coverage_15/centered_abs_mean": 0.18552227616310119,
"signal/frontier_coverage_15/group_std_mean": 0.24258872568607331,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03458261713385582,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002652968605980277,
"signal/frontier_coverage_20/centered_abs_mean": 0.17669001817703248,
"signal/frontier_coverage_20/group_std_mean": 0.23156578838825226,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03303196430206299,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002526667295023799,
"signal/frontier_coverage_25/centered_abs_mean": 0.06840592995285988,
"signal/frontier_coverage_25/group_std_mean": 0.08922984004020691,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01295476108789444,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009782047942280768,
"signal/frontier_coverage_5/centered_abs_mean": 0.18552227616310119,
"signal/frontier_coverage_5/group_std_mean": 0.24258872568607331,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03458261713385582,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002652968605980277,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32859750390052794,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3984165847301483,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4377071440219879,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03285974971950054,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_calibration/aurc": 0.13702523864879065,
"eval_calibration/batch_distribution_entropy": 0.9284841274227499,
"eval_calibration/buffer_distribution_entropy": 0.9708200938328989,
"eval_calibration/confidence_entropy": 0.4971687058154246,
"eval_calibration/coverage@0%": 0.26293682795698925,
"eval_calibration/coverage@1%": 0.26293682795698925,
"eval_calibration/coverage@10%": 0.5304099462365591,
"eval_calibration/coverage@15%": 0.6609543010752689,
"eval_calibration/coverage@20%": 0.7239583333333334,
"eval_calibration/coverage@25%": 0.8385416666666666,
"eval_calibration/coverage@30%": 0.9322916666666666,
"eval_calibration/coverage@5%": 0.28897849462365593,
"eval_calibration/ece": 0.1888974403920867,
"eval_calibration/mean_confidence": 0.5458924104876176,
"eval_completions/clipped_ratio": 0.0026041666666666665,
"eval_completions/max_length": 2327.3333333333335,
"eval_completions/max_terminated_length": 2327.3333333333335,
"eval_completions/mean_length": 692.9316202799479,
"eval_completions/mean_terminated_length": 694.7150370279948,
"eval_completions/min_length": 158.5,
"eval_completions/min_terminated_length": 230.16666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 307575259.0,
"eval_reward": 0.9107913474241892,
"eval_reward_std": 0.2171098291873932,
"eval_rewards/accuracy_reward": 0.6788194378217062,
"eval_rewards/brier_reward": 0.8023928701877594,
"eval_rewards/confidence_uniqueness_reward": 0.8938767115275065,
"eval_rewards/format_reward": 0.9973958333333334,
"eval_rewards/frontier_coverage_0": 0.02134424013396104,
"eval_rewards/frontier_coverage_1": 0.02134424013396104,
"eval_rewards/frontier_coverage_10": 0.02134424013396104,
"eval_rewards/frontier_coverage_15": 0.02134424013396104,
"eval_rewards/frontier_coverage_20": 0.02452502477293213,
"eval_rewards/frontier_coverage_25": 0.0642988532781601,
"eval_rewards/frontier_coverage_5": 0.02134424013396104,
"eval_rewards/frontier_entropy_batch_reward": -0.9973958333333334,
"eval_runtime": 139.2104,
"eval_samples_per_second": 7.183,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4271918435891469,
"eval_signal/accuracy_reward/group_std_mean": 0.4689544787009557,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9881373941898346,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21359592179457346,
"eval_signal/advantage_abs_mean": 0.8833253582318624,
"eval_signal/advantage_pre_scale_abs_mean": 0.19192364563544592,
"eval_signal/advantage_pre_scale_std": 0.21469872941573462,
"eval_signal/advantage_std": 0.986367384592692,
"eval_signal/brier_reward/centered_abs_mean": 0.18057803561290106,
"eval_signal/brier_reward/group_std_mean": 0.23535025119781494,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08358132963379224,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018057803623378277,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0436480101197958,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05746622569859028,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020154597237706184,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0043648009886965156,
"eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376,
"eval_signal/format_reward/group_std_mean": 0.014731391333043575,
"eval_signal/format_reward/group_zero_std_frac": 0.9166666766007742,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011093226571877798,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.29528985420862836,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4035186717907588,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019564516842365265,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004222644803424676,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.29528985420862836,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4035186717907588,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019564516842365265,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004222644803424676,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.29528985420862836,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4035186717907588,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019564516842365265,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004222644803424676,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.29528985420862836,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4035186717907588,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019564516842365265,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004222644803424676,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.23646595080693564,
"eval_signal/frontier_coverage_20/group_std_mean": 0.32997279862562817,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015677123485753935,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003381463116966188,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09063521524270375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.11626108984152476,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006016027880832553,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001296083559282124,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.29528985420862836,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4035186717907588,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019564516842365265,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004222644803424676,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666766007742,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0022186453764637313,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045573343522847,
"eval_steps_per_second": 0.043,
"step": 150
},
{
"calibration/aurc": 0.14161491473574045,
"calibration/batch_distribution_entropy": 0.9785979939228288,
"calibration/buffer_distribution_entropy": 0.9725325260012785,
"calibration/confidence_entropy": 0.5026786576895561,
"calibration/coverage@0%": 0.030372153493650255,
"calibration/coverage@1%": 0.030372153493650255,
"calibration/coverage@10%": 0.48445113108853616,
"calibration/coverage@15%": 0.6141158725686856,
"calibration/coverage@20%": 0.7234010943737882,
"calibration/coverage@25%": 0.850517394479151,
"calibration/coverage@30%": 0.8960138340191106,
"calibration/coverage@5%": 0.2579811082970747,
"calibration/ece": 0.1825885505288956,
"calibration/mean_confidence": 0.5590493941529713,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.003993055555555558,
"completions/max_length": 3360.2,
"completions/max_terminated_length": 3360.2,
"completions/mean_length": 680.30234375,
"completions/mean_terminated_length": 683.0581176757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.4,
"epoch": 0.3719953500581243,
"grad_norm": 0.003881107782945037,
"learning_rate": 1.5963855421686747e-06,
"loss": -0.0034,
"num_tokens": 318520054.0,
"reward": 1.0143356561660766,
"reward_std": 0.11488137692213059,
"rewards/accuracy_reward": 0.7287326455116272,
"rewards/brier_reward": 0.8206124186515809,
"rewards/confidence_uniqueness_reward": 0.9463862299919128,
"rewards/format_reward": 0.9959201574325561,
"rewards/frontier_coverage_0": 0.008624611730920152,
"rewards/frontier_coverage_1": 0.008624611730920152,
"rewards/frontier_coverage_10": 0.008623575296951458,
"rewards/frontier_coverage_15": 0.008593602268956602,
"rewards/frontier_coverage_20": 0.019597085565328597,
"rewards/frontier_coverage_25": 0.09769158065319061,
"rewards/frontier_coverage_5": 0.008624611730920152,
"rewards/frontier_entropy_batch_reward": -0.26984030604362486,
"signal/accuracy_reward/centered_abs_mean": 0.15015733242034912,
"signal/accuracy_reward/group_std_mean": 0.20241186618804932,
"signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0236715793609619,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07507866621017456,
"signal/advantage_abs_mean": 0.7554642915725708,
"signal/advantage_pre_scale_abs_mean": 0.0852625235915184,
"signal/advantage_pre_scale_std": 0.13472781628370284,
"signal/advantage_std": 0.9831080079078675,
"signal/brier_reward/centered_abs_mean": 0.1314813494682312,
"signal/brier_reward/group_std_mean": 0.16985029578208924,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18111539185047149,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013148135691881179,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020419245585799217,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03257020190358162,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02790914885699749,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020419245585799215,
"signal/format_reward/centered_abs_mean": 0.007546658022329211,
"signal/format_reward/group_std_mean": 0.016791296564042567,
"signal/format_reward/group_zero_std_frac": 0.9222222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05060187578201294,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0037733290111646054,
"signal/frontier_coverage_0/centered_abs_mean": 0.19345370233058928,
"signal/frontier_coverage_0/group_std_mean": 0.2506356716156006,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03803465738892555,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027663879096508025,
"signal/frontier_coverage_1/centered_abs_mean": 0.19345370233058928,
"signal/frontier_coverage_1/group_std_mean": 0.2506356716156006,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03803465738892555,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027663879096508025,
"signal/frontier_coverage_10/centered_abs_mean": 0.19344922304153442,
"signal/frontier_coverage_10/group_std_mean": 0.2506299793720245,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03803384155035019,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002766323834657669,
"signal/frontier_coverage_15/centered_abs_mean": 0.19292726516723632,
"signal/frontier_coverage_15/group_std_mean": 0.2499801516532898,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03793646469712257,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027588598895817995,
"signal/frontier_coverage_20/centered_abs_mean": 0.1353215456008911,
"signal/frontier_coverage_20/group_std_mean": 0.17713548839092255,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02666233666241169,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001935098133981228,
"signal/frontier_coverage_25/centered_abs_mean": 0.08023149967193603,
"signal/frontier_coverage_25/group_std_mean": 0.10117035806179046,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015790591202676296,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001147310435771942,
"signal/frontier_coverage_5/centered_abs_mean": 0.19345370233058928,
"signal/frontier_coverage_5/group_std_mean": 0.2506356716156006,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03803465738892555,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027663879096508025,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3274266362190247,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39751541018486025,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4508472442626953,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03274266496300697,
"step": 155
},
{
"calibration/aurc": 0.09399491959647298,
"calibration/batch_distribution_entropy": 0.9412546711952994,
"calibration/buffer_distribution_entropy": 0.9754604476125126,
"calibration/confidence_entropy": 0.4770566756690894,
"calibration/coverage@0%": 0.09393830903780045,
"calibration/coverage@1%": 0.09393830903780045,
"calibration/coverage@10%": 0.7389721201296986,
"calibration/coverage@15%": 0.843777086332959,
"calibration/coverage@20%": 0.880931680869679,
"calibration/coverage@25%": 0.9050462602561364,
"calibration/coverage@30%": 0.9182058047493402,
"calibration/coverage@5%": 0.5374245034399536,
"calibration/ece": 0.16942426673168062,
"calibration/mean_confidence": 0.6137393061839143,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006770833333333326,
"completions/max_length": 3504.0,
"completions/max_terminated_length": 3504.0,
"completions/mean_length": 693.0671875,
"completions/mean_terminated_length": 697.8524291992187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.6,
"epoch": 0.38399520005999926,
"grad_norm": 0.003740179119631648,
"learning_rate": 1.4457831325301204e-06,
"loss": -0.0148,
"num_tokens": 329591484.0,
"reward": 0.9867475509643555,
"reward_std": 0.1178449347615242,
"rewards/accuracy_reward": 0.6817708253860474,
"rewards/brier_reward": 0.8016348242759704,
"rewards/confidence_uniqueness_reward": 0.9426298260688781,
"rewards/format_reward": 0.9927083253860474,
"rewards/frontier_coverage_0": 0.02299555651843548,
"rewards/frontier_coverage_1": 0.02299555651843548,
"rewards/frontier_coverage_10": 0.02299380600452423,
"rewards/frontier_coverage_15": 0.022937557473778725,
"rewards/frontier_coverage_20": 0.027411183714866637,
"rewards/frontier_coverage_25": 0.10199409276247025,
"rewards/frontier_coverage_5": 0.02299555651843548,
"rewards/frontier_entropy_batch_reward": -0.28412319123744967,
"signal/accuracy_reward/centered_abs_mean": 0.14393446147441863,
"signal/accuracy_reward/group_std_mean": 0.1882144957780838,
"signal/accuracy_reward/group_zero_std_frac": 0.47222222089767457,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0052073359489442,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07196723073720931,
"signal/advantage_abs_mean": 0.7603908181190491,
"signal/advantage_pre_scale_abs_mean": 0.08789721131324768,
"signal/advantage_pre_scale_std": 0.14016545712947845,
"signal/advantage_std": 0.9830867886543274,
"signal/brier_reward/centered_abs_mean": 0.13717943131923677,
"signal/brier_reward/group_std_mean": 0.17664145231246947,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19173648059368134,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013717942871153355,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025140639021992685,
"signal/confidence_uniqueness_reward/group_std_mean": 0.041734833270311356,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034909750893712045,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025140638928860424,
"signal/format_reward/centered_abs_mean": 0.012847222201526166,
"signal/format_reward/group_std_mean": 0.026805402338504793,
"signal/format_reward/group_zero_std_frac": 0.8805555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08747010976076126,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006423611100763083,
"signal/frontier_coverage_0/centered_abs_mean": 0.1893948495388031,
"signal/frontier_coverage_0/group_std_mean": 0.24652081727981567,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037821638584136966,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002708346350118518,
"signal/frontier_coverage_1/centered_abs_mean": 0.1893948495388031,
"signal/frontier_coverage_1/group_std_mean": 0.24652081727981567,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037821638584136966,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002708346350118518,
"signal/frontier_coverage_10/centered_abs_mean": 0.18938452005386353,
"signal/frontier_coverage_10/group_std_mean": 0.2465077221393585,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.037819582223892215,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002708198828622699,
"signal/frontier_coverage_15/centered_abs_mean": 0.18859367370605468,
"signal/frontier_coverage_15/group_std_mean": 0.24550087451934816,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03766307979822159,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026968895457684995,
"signal/frontier_coverage_20/centered_abs_mean": 0.1072016030550003,
"signal/frontier_coverage_20/group_std_mean": 0.139630264043808,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02150789238512516,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015329829417169093,
"signal/frontier_coverage_25/centered_abs_mean": 0.08940613269805908,
"signal/frontier_coverage_25/group_std_mean": 0.11217249184846878,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017939681187272072,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012785077095031738,
"signal/frontier_coverage_5/centered_abs_mean": 0.1893948495388031,
"signal/frontier_coverage_5/group_std_mean": 0.24652081727981567,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037821638584136966,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002708346350118518,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32551802396774293,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39456554055213927,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45712563395500183,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03255180567502976,
"step": 160
},
{
"calibration/aurc": 0.12667895964624948,
"calibration/batch_distribution_entropy": 0.9667500631719177,
"calibration/buffer_distribution_entropy": 0.9768246962707771,
"calibration/confidence_entropy": 0.4787488938658747,
"calibration/coverage@0%": 0.08587612683228549,
"calibration/coverage@1%": 0.08587612683228549,
"calibration/coverage@10%": 0.5596784657665852,
"calibration/coverage@15%": 0.658714518716747,
"calibration/coverage@20%": 0.7396279283724964,
"calibration/coverage@25%": 0.8107754068022667,
"calibration/coverage@30%": 0.8813963204972873,
"calibration/coverage@5%": 0.3803046427900564,
"calibration/ece": 0.20473769314593335,
"calibration/mean_confidence": 0.5073165586899123,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009895833333333348,
"completions/max_length": 3408.6,
"completions/max_terminated_length": 3408.6,
"completions/mean_length": 766.8527099609375,
"completions/mean_terminated_length": 774.5172241210937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 247.6,
"epoch": 0.39599505006187424,
"grad_norm": 0.0031843557953834534,
"learning_rate": 1.2951807228915664e-06,
"loss": -0.0273,
"num_tokens": 341564699.0,
"reward": 0.9761411070823669,
"reward_std": 0.11881034970283508,
"rewards/accuracy_reward": 0.6574652791023254,
"rewards/brier_reward": 0.7964750170707703,
"rewards/confidence_uniqueness_reward": 0.9407760500907898,
"rewards/format_reward": 0.9895833253860473,
"rewards/frontier_coverage_0": 0.04257221892476082,
"rewards/frontier_coverage_1": 0.04257221892476082,
"rewards/frontier_coverage_10": 0.04257510676980018,
"rewards/frontier_coverage_15": 0.04277926944196224,
"rewards/frontier_coverage_20": 0.046192364767193794,
"rewards/frontier_coverage_25": 0.10422454476356506,
"rewards/frontier_coverage_5": 0.04257221892476082,
"rewards/frontier_entropy_batch_reward": -0.26306197941303255,
"signal/accuracy_reward/centered_abs_mean": 0.1366536423563957,
"signal/accuracy_reward/group_std_mean": 0.18269501626491547,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9548314452171326,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06832682117819786,
"signal/advantage_abs_mean": 0.7397696733474731,
"signal/advantage_pre_scale_abs_mean": 0.08615766167640686,
"signal/advantage_pre_scale_std": 0.1432511866092682,
"signal/advantage_std": 0.9830728769302368,
"signal/brier_reward/centered_abs_mean": 0.13853301703929902,
"signal/brier_reward/group_std_mean": 0.17815548181533813,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1958606421947479,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013853302784264087,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02927153408527374,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05202648937702179,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04150531962513924,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002927153604105115,
"signal/format_reward/centered_abs_mean": 0.01867404468357563,
"signal/format_reward/group_std_mean": 0.03949138410389423,
"signal/format_reward/group_zero_std_frac": 0.8222222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1326592281460762,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009337022341787815,
"signal/frontier_coverage_0/centered_abs_mean": 0.20547735095024108,
"signal/frontier_coverage_0/group_std_mean": 0.2617917537689209,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041419435292482376,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002938326168805361,
"signal/frontier_coverage_1/centered_abs_mean": 0.20547735095024108,
"signal/frontier_coverage_1/group_std_mean": 0.2617917537689209,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041419435292482376,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002938326168805361,
"signal/frontier_coverage_10/centered_abs_mean": 0.20546633899211883,
"signal/frontier_coverage_10/group_std_mean": 0.2617780089378357,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04141724780201912,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029381686355918644,
"signal/frontier_coverage_15/centered_abs_mean": 0.2041507601737976,
"signal/frontier_coverage_15/group_std_mean": 0.2601293295621872,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04115338325500488,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029193558264523746,
"signal/frontier_coverage_20/centered_abs_mean": 0.09043723046779632,
"signal/frontier_coverage_20/group_std_mean": 0.11598165482282638,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018281865678727627,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012932523852214218,
"signal/frontier_coverage_25/centered_abs_mean": 0.0844599574804306,
"signal/frontier_coverage_25/group_std_mean": 0.10744838416576385,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017018306627869607,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012077773921191693,
"signal/frontier_coverage_5/centered_abs_mean": 0.20547735095024108,
"signal/frontier_coverage_5/group_std_mean": 0.2617917537689209,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041419435292482376,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002938326168805361,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32049464583396914,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39367471933364867,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4526883363723755,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03204946555197239,
"step": 165
},
{
"calibration/aurc": 0.12042923465880181,
"calibration/batch_distribution_entropy": 0.9231336191494597,
"calibration/buffer_distribution_entropy": 0.9772784581546675,
"calibration/confidence_entropy": 0.4722359481486856,
"calibration/coverage@0%": 0.02619224996737286,
"calibration/coverage@1%": 0.02619224996737286,
"calibration/coverage@10%": 0.5754376567915199,
"calibration/coverage@15%": 0.6780955222879599,
"calibration/coverage@20%": 0.8664804654079841,
"calibration/coverage@25%": 0.9255724249938371,
"calibration/coverage@30%": 0.9763779527559056,
"calibration/coverage@5%": 0.21658759117472193,
"calibration/ece": 0.11244536955518544,
"calibration/mean_confidence": 0.6432379778464519,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010677083333333327,
"completions/max_length": 3379.8,
"completions/max_terminated_length": 3379.8,
"completions/mean_length": 764.9436767578125,
"completions/mean_terminated_length": 773.263427734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 232.4,
"epoch": 0.4079949000637492,
"grad_norm": 0.0034331590868532658,
"learning_rate": 1.1445783132530121e-06,
"loss": -0.0318,
"num_tokens": 353466034.0,
"reward": 1.001962959766388,
"reward_std": 0.12031054049730301,
"rewards/accuracy_reward": 0.7228298544883728,
"rewards/brier_reward": 0.8268496513366699,
"rewards/confidence_uniqueness_reward": 0.9339795589447022,
"rewards/format_reward": 0.9889756798744201,
"rewards/frontier_coverage_0": 0.018969600554555655,
"rewards/frontier_coverage_1": 0.018969600554555655,
"rewards/frontier_coverage_10": 0.01897885270882398,
"rewards/frontier_coverage_15": 0.02006477633258328,
"rewards/frontier_coverage_20": 0.05282620638608933,
"rewards/frontier_coverage_25": 0.15781393945217131,
"rewards/frontier_coverage_5": 0.018969600554555655,
"rewards/frontier_entropy_batch_reward": -0.34407015442848204,
"signal/accuracy_reward/centered_abs_mean": 0.13177625983953475,
"signal/accuracy_reward/group_std_mean": 0.1782878965139389,
"signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.957055127620697,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06588812991976738,
"signal/advantage_abs_mean": 0.7518744587898254,
"signal/advantage_pre_scale_abs_mean": 0.08772747218608856,
"signal/advantage_pre_scale_std": 0.14746004790067674,
"signal/advantage_std": 0.9830225229263305,
"signal/brier_reward/centered_abs_mean": 0.12697158604860306,
"signal/brier_reward/group_std_mean": 0.16580133736133576,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18501022160053254,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012697158567607402,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03250643089413643,
"signal/confidence_uniqueness_reward/group_std_mean": 0.054881346970796586,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0470227912068367,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003250643378123641,
"signal/format_reward/centered_abs_mean": 0.01877712644636631,
"signal/format_reward/group_std_mean": 0.03860697820782662,
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.13337477520108224,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009388563223183155,
"signal/frontier_coverage_0/centered_abs_mean": 0.1576144963502884,
"signal/frontier_coverage_0/group_std_mean": 0.21049812138080598,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03282146006822586,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022538872435688972,
"signal/frontier_coverage_1/centered_abs_mean": 0.1576144963502884,
"signal/frontier_coverage_1/group_std_mean": 0.21049812138080598,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03282146006822586,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022538872435688972,
"signal/frontier_coverage_10/centered_abs_mean": 0.15755284130573272,
"signal/frontier_coverage_10/group_std_mean": 0.2104198604822159,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03280867114663124,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022530056070536376,
"signal/frontier_coverage_15/centered_abs_mean": 0.1547604590654373,
"signal/frontier_coverage_15/group_std_mean": 0.20678509175777435,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03222393654286861,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002213074592873454,
"signal/frontier_coverage_20/centered_abs_mean": 0.06509168595075607,
"signal/frontier_coverage_20/group_std_mean": 0.08332156985998154,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01360565610229969,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009308110922574997,
"signal/frontier_coverage_25/centered_abs_mean": 0.10484070777893066,
"signal/frontier_coverage_25/group_std_mean": 0.13356612026691436,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02198589891195297,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014992221491411327,
"signal/frontier_coverage_5/centered_abs_mean": 0.1576144963502884,
"signal/frontier_coverage_5/group_std_mean": 0.21049812138080598,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03282146006822586,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022538872435688972,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34670414328575133,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41445213556289673,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5086399018764496,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03467041626572609,
"step": 170
},
{
"calibration/aurc": 0.09662731950335442,
"calibration/batch_distribution_entropy": 0.967024441243046,
"calibration/buffer_distribution_entropy": 0.9772981503148188,
"calibration/confidence_entropy": 0.5046960184786891,
"calibration/coverage@0%": 0.08649086696074063,
"calibration/coverage@1%": 0.1979207803453709,
"calibration/coverage@10%": 0.6744557656695441,
"calibration/coverage@15%": 0.7621469822036061,
"calibration/coverage@20%": 0.850881153068473,
"calibration/coverage@25%": 0.9165939939987974,
"calibration/coverage@30%": 0.9591402590245247,
"calibration/coverage@5%": 0.3844438926821537,
"calibration/ece": 0.16998348481003883,
"calibration/mean_confidence": 0.5691553376825024,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009201388888888907,
"completions/max_length": 3807.2,
"completions/max_terminated_length": 3807.2,
"completions/mean_length": 819.2960205078125,
"completions/mean_terminated_length": 826.9532104492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 258.4,
"epoch": 0.4199947500656242,
"grad_norm": 0.0033503889571875334,
"learning_rate": 9.93975903614458e-07,
"loss": -0.0224,
"num_tokens": 366012292.0,
"reward": 1.0019341111183167,
"reward_std": 0.12585299015045165,
"rewards/accuracy_reward": 0.7077257037162781,
"rewards/brier_reward": 0.8151641726493836,
"rewards/confidence_uniqueness_reward": 0.9411271095275879,
"rewards/format_reward": 0.9901041746139526,
"rewards/frontier_coverage_0": 0.014120917581021786,
"rewards/frontier_coverage_1": 0.014120917581021786,
"rewards/frontier_coverage_10": 0.014142588526010514,
"rewards/frontier_coverage_15": 0.014934336580336095,
"rewards/frontier_coverage_20": 0.053513363003730774,
"rewards/frontier_coverage_25": 0.1392007663846016,
"rewards/frontier_coverage_5": 0.014121649414300918,
"rewards/frontier_entropy_batch_reward": -0.26387418806552887,
"signal/accuracy_reward/centered_abs_mean": 0.15167643427848815,
"signal/accuracy_reward/group_std_mean": 0.2053221881389618,
"signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.008397400379181,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07583821713924407,
"signal/advantage_abs_mean": 0.7449156880378723,
"signal/advantage_pre_scale_abs_mean": 0.09188491851091385,
"signal/advantage_pre_scale_std": 0.1496666193008423,
"signal/advantage_std": 0.9831571817398072,
"signal/brier_reward/centered_abs_mean": 0.12859825491905214,
"signal/brier_reward/group_std_mean": 0.16735298037528992,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1708065688610077,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012859826162457465,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02758704237639904,
"signal/confidence_uniqueness_reward/group_std_mean": 0.045367203652858734,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03653429411351681,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027587041724473236,
"signal/format_reward/centered_abs_mean": 0.016514757089316844,
"signal/format_reward/group_std_mean": 0.032019348442554475,
"signal/format_reward/group_zero_std_frac": 0.8638888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10936851501464843,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008257378544658422,
"signal/frontier_coverage_0/centered_abs_mean": 0.18040508925914764,
"signal/frontier_coverage_0/group_std_mean": 0.23544572591781615,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03435261063277721,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025797927286475898,
"signal/frontier_coverage_1/centered_abs_mean": 0.18040508925914764,
"signal/frontier_coverage_1/group_std_mean": 0.23544572591781615,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03435261063277721,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025797927286475898,
"signal/frontier_coverage_10/centered_abs_mean": 0.18032192587852477,
"signal/frontier_coverage_10/group_std_mean": 0.235341677069664,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03433669619262218,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025786036625504495,
"signal/frontier_coverage_15/centered_abs_mean": 0.1752968579530716,
"signal/frontier_coverage_15/group_std_mean": 0.22891083657741546,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.033378247171640396,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002506745047867298,
"signal/frontier_coverage_20/centered_abs_mean": 0.06402314454317093,
"signal/frontier_coverage_20/group_std_mean": 0.08175744861364365,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012168211303651333,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009155309875495732,
"signal/frontier_coverage_25/centered_abs_mean": 0.09939492493867874,
"signal/frontier_coverage_25/group_std_mean": 0.12737924307584764,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018856792896986007,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001421347470022738,
"signal/frontier_coverage_5/centered_abs_mean": 0.18040342926979064,
"signal/frontier_coverage_5/group_std_mean": 0.23544372022151946,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034352288022637366,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002579768933355808,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3185386657714844,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3876194655895233,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42217653393745425,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0318538673222065,
"step": 175
},
{
"calibration/aurc": 0.08526290527356786,
"calibration/batch_distribution_entropy": 0.9492263091629465,
"calibration/buffer_distribution_entropy": 0.9778171307399122,
"calibration/confidence_entropy": 0.49997622614018694,
"calibration/coverage@0%": 0.05113034229386461,
"calibration/coverage@1%": 0.09365002733323467,
"calibration/coverage@10%": 0.7473668466249658,
"calibration/coverage@15%": 0.8666818025240637,
"calibration/coverage@20%": 0.9463922907613297,
"calibration/coverage@25%": 0.9740053050397878,
"calibration/coverage@30%": 0.9946949602122016,
"calibration/coverage@5%": 0.3784139398978424,
"calibration/ece": 0.166516695461729,
"calibration/mean_confidence": 0.6187747295165235,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009982638888888862,
"completions/max_length": 3491.4,
"completions/max_terminated_length": 3491.4,
"completions/mean_length": 794.2389770507813,
"completions/mean_terminated_length": 802.2543701171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 254.2,
"epoch": 0.4319946000674992,
"grad_norm": 0.003332644235342741,
"learning_rate": 8.433734939759036e-07,
"loss": -0.0247,
"num_tokens": 378261893.0,
"reward": 1.0071517705917359,
"reward_std": 0.12117233127355576,
"rewards/accuracy_reward": 0.725781238079071,
"rewards/brier_reward": 0.8080682635307312,
"rewards/confidence_uniqueness_reward": 0.938874113559723,
"rewards/format_reward": 0.9896701335906982,
"rewards/frontier_coverage_0": -0.003929438255727291,
"rewards/frontier_coverage_1": -0.003929438255727291,
"rewards/frontier_coverage_10": -0.0038942765444517136,
"rewards/frontier_coverage_15": -0.004395973100326955,
"rewards/frontier_coverage_20": 0.05808563455939293,
"rewards/frontier_coverage_25": 0.14676995277404786,
"rewards/frontier_coverage_5": -0.003927422594279051,
"rewards/frontier_entropy_batch_reward": -0.27910477519035337,
"signal/accuracy_reward/centered_abs_mean": 0.1342068150639534,
"signal/accuracy_reward/group_std_mean": 0.17787945568561553,
"signal/accuracy_reward/group_zero_std_frac": 0.4888888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9742860913276672,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0671034075319767,
"signal/advantage_abs_mean": 0.7551454067230224,
"signal/advantage_pre_scale_abs_mean": 0.08928989768028259,
"signal/advantage_pre_scale_std": 0.14911575615406036,
"signal/advantage_std": 0.9830282688140869,
"signal/brier_reward/centered_abs_mean": 0.1267807200551033,
"signal/brier_reward/group_std_mean": 0.16448463797569274,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18453309237957,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012678072415292263,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029533155634999274,
"signal/confidence_uniqueness_reward/group_std_mean": 0.050531229376792906,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04347648099064827,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029533156659454106,
"signal/format_reward/centered_abs_mean": 0.018115234375,
"signal/format_reward/group_std_mean": 0.036967866495251654,
"signal/format_reward/group_zero_std_frac": 0.8361111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.13411470055580138,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0090576171875,
"signal/frontier_coverage_0/centered_abs_mean": 0.1632930189371109,
"signal/frontier_coverage_0/group_std_mean": 0.21255632638931274,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03398923799395561,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023350901901721954,
"signal/frontier_coverage_1/centered_abs_mean": 0.1632930189371109,
"signal/frontier_coverage_1/group_std_mean": 0.21255632638931274,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03398923799395561,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023350901901721954,
"signal/frontier_coverage_10/centered_abs_mean": 0.1632182240486145,
"signal/frontier_coverage_10/group_std_mean": 0.21246310472488403,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03397372327744961,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023340205661952496,
"signal/frontier_coverage_15/centered_abs_mean": 0.15129518806934356,
"signal/frontier_coverage_15/group_std_mean": 0.1972308337688446,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03146095797419548,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021635211771354078,
"signal/frontier_coverage_20/centered_abs_mean": 0.0666133850812912,
"signal/frontier_coverage_20/group_std_mean": 0.08397864252328872,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01388755403459072,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009525713743641972,
"signal/frontier_coverage_25/centered_abs_mean": 0.10736925154924393,
"signal/frontier_coverage_25/group_std_mean": 0.13568062484264373,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022355619445443155,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015353802824392914,
"signal/frontier_coverage_5/centered_abs_mean": 0.16328986287117003,
"signal/frontier_coverage_5/group_std_mean": 0.21255233883857727,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033988584950566295,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335045067593455,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.321364963054657,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38714643120765685,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4681834578514099,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03213649578392506,
"step": 180
},
{
"calibration/aurc": 0.147813420478697,
"calibration/batch_distribution_entropy": 0.9615515889414834,
"calibration/buffer_distribution_entropy": 0.9768709358020142,
"calibration/confidence_entropy": 0.48301767115068583,
"calibration/coverage@0%": 0.021637299525751035,
"calibration/coverage@1%": 0.021637299525751035,
"calibration/coverage@10%": 0.29680602288229385,
"calibration/coverage@15%": 0.7420242355127942,
"calibration/coverage@20%": 0.8569570879698016,
"calibration/coverage@25%": 0.938528549490437,
"calibration/coverage@30%": 0.9830238726790451,
"calibration/coverage@5%": 0.04941376570265051,
"calibration/ece": 0.18628405567119197,
"calibration/mean_confidence": 0.5851708761244938,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010503472222222232,
"completions/max_length": 3336.4,
"completions/max_terminated_length": 3336.4,
"completions/mean_length": 798.7246704101562,
"completions/mean_terminated_length": 807.3341674804688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 219.0,
"epoch": 0.44399445006937416,
"grad_norm": 0.0037687718868255615,
"learning_rate": 6.927710843373495e-07,
"loss": -0.026,
"num_tokens": 390553249.0,
"reward": 0.9922348737716675,
"reward_std": 0.1255135342478752,
"rewards/accuracy_reward": 0.6885416626930236,
"rewards/brier_reward": 0.8148928165435791,
"rewards/confidence_uniqueness_reward": 0.939547860622406,
"rewards/format_reward": 0.9893229126930236,
"rewards/frontier_coverage_0": 0.02959106657654047,
"rewards/frontier_coverage_1": 0.02959106657654047,
"rewards/frontier_coverage_10": 0.029620955046266318,
"rewards/frontier_coverage_15": 0.030684778385329993,
"rewards/frontier_coverage_20": 0.07125861793756486,
"rewards/frontier_coverage_25": 0.15161574482917786,
"rewards/frontier_coverage_5": 0.029591639526188374,
"rewards/frontier_entropy_batch_reward": -0.2746046096086502,
"signal/accuracy_reward/centered_abs_mean": 0.15325520634651185,
"signal/accuracy_reward/group_std_mean": 0.20086986124515532,
"signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.050264871120453,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07662760317325593,
"signal/advantage_abs_mean": 0.7525103449821472,
"signal/advantage_pre_scale_abs_mean": 0.09348735362291336,
"signal/advantage_pre_scale_std": 0.15054976642131807,
"signal/advantage_std": 0.9831097364425659,
"signal/brier_reward/centered_abs_mean": 0.13104279041290284,
"signal/brier_reward/group_std_mean": 0.16971164345741271,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18007910549640654,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013104279339313508,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02803487591445446,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04839293137192726,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.038316420093178746,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028034877497702835,
"signal/format_reward/centered_abs_mean": 0.01681315116584301,
"signal/format_reward/group_std_mean": 0.03504555374383926,
"signal/format_reward/group_zero_std_frac": 0.8416666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1139563001692295,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008406575582921506,
"signal/frontier_coverage_0/centered_abs_mean": 0.1829205185174942,
"signal/frontier_coverage_0/group_std_mean": 0.23725357055664062,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035879862308502194,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026157634798437356,
"signal/frontier_coverage_1/centered_abs_mean": 0.1829205185174942,
"signal/frontier_coverage_1/group_std_mean": 0.23725357055664062,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035879862308502194,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026157634798437356,
"signal/frontier_coverage_10/centered_abs_mean": 0.18279743790626526,
"signal/frontier_coverage_10/group_std_mean": 0.23710041046142577,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03585578799247742,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026140033267438413,
"signal/frontier_coverage_15/centered_abs_mean": 0.16599198877811433,
"signal/frontier_coverage_15/group_std_mean": 0.2160373091697693,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03256378434598446,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023736854549497367,
"signal/frontier_coverage_20/centered_abs_mean": 0.07057978808879853,
"signal/frontier_coverage_20/group_std_mean": 0.08835373222827911,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013883821666240692,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010092909797094762,
"signal/frontier_coverage_25/centered_abs_mean": 0.10900045037269593,
"signal/frontier_coverage_25/group_std_mean": 0.13839271068572997,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02145172506570816,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015587064437568188,
"signal/frontier_coverage_5/centered_abs_mean": 0.18291856944561005,
"signal/frontier_coverage_5/group_std_mean": 0.23725113570690154,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03587948232889175,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002615735540166497,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3228137791156769,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39099804162979124,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44452112913131714,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032281379029154775,
"step": 185
},
{
"calibration/aurc": 0.10794396304820482,
"calibration/batch_distribution_entropy": 0.9318776316967142,
"calibration/buffer_distribution_entropy": 0.9766401009144714,
"calibration/confidence_entropy": 0.48874635286794427,
"calibration/coverage@0%": 0.02941019738198198,
"calibration/coverage@1%": 0.02941019738198198,
"calibration/coverage@10%": 0.566576001128133,
"calibration/coverage@15%": 0.7814061012715239,
"calibration/coverage@20%": 0.9241913307868799,
"calibration/coverage@25%": 0.9963446475195823,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.39669069301030124,
"calibration/ece": 0.17917218190945836,
"calibration/mean_confidence": 0.6358024523294838,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006770833333333326,
"completions/max_length": 3528.6,
"completions/max_terminated_length": 3528.6,
"completions/mean_length": 782.6207641601562,
"completions/mean_terminated_length": 787.9698608398437,
"completions/min_length": 0.0,
"completions/min_terminated_length": 245.4,
"epoch": 0.45599430007124914,
"grad_norm": 0.004032325465232134,
"learning_rate": 5.421686746987952e-07,
"loss": -0.0193,
"num_tokens": 402651984.0,
"reward": 1.0132048964500426,
"reward_std": 0.11641700565814972,
"rewards/accuracy_reward": 0.7356770753860473,
"rewards/brier_reward": 0.8206545829772949,
"rewards/confidence_uniqueness_reward": 0.9407994031906128,
"rewards/format_reward": 0.9932291626930236,
"rewards/frontier_coverage_0": 0.005415836116299033,
"rewards/frontier_coverage_1": 0.005415836116299033,
"rewards/frontier_coverage_10": 0.005474161216989159,
"rewards/frontier_coverage_15": 0.009392570797353983,
"rewards/frontier_coverage_20": 0.0829792320728302,
"rewards/frontier_coverage_25": 0.17440233528614044,
"rewards/frontier_coverage_5": 0.005415877094492316,
"rewards/frontier_entropy_batch_reward": -0.31519128680229186,
"signal/accuracy_reward/centered_abs_mean": 0.14232313483953477,
"signal/accuracy_reward/group_std_mean": 0.18668197989463806,
"signal/accuracy_reward/group_zero_std_frac": 0.4694444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0415077209472656,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07116156741976738,
"signal/advantage_abs_mean": 0.7595191597938538,
"signal/advantage_pre_scale_abs_mean": 0.08674072474241257,
"signal/advantage_pre_scale_std": 0.14214332550764083,
"signal/advantage_std": 0.9830122709274292,
"signal/brier_reward/centered_abs_mean": 0.12559207975864412,
"signal/brier_reward/group_std_mean": 0.16448963582515716,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18423607349395751,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01255920883268118,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025163330510258673,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0417561799287796,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0369965672492981,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002516333060339093,
"signal/format_reward/centered_abs_mean": 0.0123046875,
"signal/format_reward/group_std_mean": 0.0261313796043396,
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08963212668895722,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00615234375,
"signal/frontier_coverage_0/centered_abs_mean": 0.17310574054718017,
"signal/frontier_coverage_0/group_std_mean": 0.22561688125133514,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036221811175346376,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024754120968282223,
"signal/frontier_coverage_1/centered_abs_mean": 0.17310574054718017,
"signal/frontier_coverage_1/group_std_mean": 0.22561688125133514,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036221811175346376,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024754120968282223,
"signal/frontier_coverage_10/centered_abs_mean": 0.17299672365188598,
"signal/frontier_coverage_10/group_std_mean": 0.22547802329063416,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.036198879778385165,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024738531094044445,
"signal/frontier_coverage_15/centered_abs_mean": 0.15460915565490724,
"signal/frontier_coverage_15/group_std_mean": 0.20132783949375152,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03233877532184124,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022109109442681072,
"signal/frontier_coverage_20/centered_abs_mean": 0.07516934722661972,
"signal/frontier_coverage_20/group_std_mean": 0.09350252896547318,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015893686562776566,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010749216424301266,
"signal/frontier_coverage_25/centered_abs_mean": 0.11685722768306732,
"signal/frontier_coverage_25/group_std_mean": 0.14719865024089812,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024771924316883086,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016710583819076418,
"signal/frontier_coverage_5/centered_abs_mean": 0.1731052041053772,
"signal/frontier_coverage_5/group_std_mean": 0.22561621367931367,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0362217016518116,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024754045065492392,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33898064494132996,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40483956336975097,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.500599205493927,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03389806374907493,
"step": 190
},
{
"calibration/aurc": 0.16990665201284244,
"calibration/batch_distribution_entropy": 0.9763861379418843,
"calibration/buffer_distribution_entropy": 0.9756461996279331,
"calibration/confidence_entropy": 0.4907997092983952,
"calibration/coverage@0%": 0.04279541816108065,
"calibration/coverage@1%": 0.04279541816108065,
"calibration/coverage@10%": 0.3558397094996633,
"calibration/coverage@15%": 0.48278590372538516,
"calibration/coverage@20%": 0.6133848323851534,
"calibration/coverage@25%": 0.8433561312198542,
"calibration/coverage@30%": 0.8935762652705062,
"calibration/coverage@5%": 0.19339417144192056,
"calibration/ece": 0.1626894563658173,
"calibration/mean_confidence": 0.5491100531123216,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0057291666666666515,
"completions/max_length": 3678.0,
"completions/max_terminated_length": 3678.0,
"completions/mean_length": 815.7873168945313,
"completions/mean_terminated_length": 820.5162841796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 215.8,
"epoch": 0.46799415007312406,
"grad_norm": 0.0038923481479287148,
"learning_rate": 3.91566265060241e-07,
"loss": -0.0136,
"num_tokens": 415130718.0,
"reward": 0.993002200126648,
"reward_std": 0.11667114496231079,
"rewards/accuracy_reward": 0.6849826335906982,
"rewards/brier_reward": 0.8041805863380432,
"rewards/confidence_uniqueness_reward": 0.9449598073959351,
"rewards/format_reward": 0.9940104246139526,
"rewards/frontier_coverage_0": 0.023747061751782893,
"rewards/frontier_coverage_1": 0.023747061751782893,
"rewards/frontier_coverage_10": 0.023766111955046652,
"rewards/frontier_coverage_15": 0.027177707105875016,
"rewards/frontier_coverage_20": 0.07301479429006577,
"rewards/frontier_coverage_25": 0.1472606360912323,
"rewards/frontier_coverage_5": 0.023747061751782893,
"rewards/frontier_entropy_batch_reward": -0.26305546462535856,
"signal/accuracy_reward/centered_abs_mean": 0.14096679538488388,
"signal/accuracy_reward/group_std_mean": 0.19119617640972136,
"signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9744200944900513,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07048339769244194,
"signal/advantage_abs_mean": 0.7451924324035645,
"signal/advantage_pre_scale_abs_mean": 0.08635586649179458,
"signal/advantage_pre_scale_std": 0.13891534209251405,
"signal/advantage_std": 0.9830895900726319,
"signal/brier_reward/centered_abs_mean": 0.13546755462884902,
"signal/brier_reward/group_std_mean": 0.17547394037246705,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18838207721710204,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01354675628244877,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02242642156779766,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03652404025197029,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031294023245573045,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022426421754062177,
"signal/format_reward/centered_abs_mean": 0.010574001539498567,
"signal/format_reward/group_std_mean": 0.022073457762598992,
"signal/format_reward/group_zero_std_frac": 0.9000000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07329605147242546,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0052870007697492834,
"signal/frontier_coverage_0/centered_abs_mean": 0.1862773597240448,
"signal/frontier_coverage_0/group_std_mean": 0.24120102524757386,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03693324699997902,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026637662667781115,
"signal/frontier_coverage_1/centered_abs_mean": 0.1862773597240448,
"signal/frontier_coverage_1/group_std_mean": 0.24120102524757386,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03693324699997902,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026637662667781115,
"signal/frontier_coverage_10/centered_abs_mean": 0.1861777275800705,
"signal/frontier_coverage_10/group_std_mean": 0.2410757929086685,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.036913507431745526,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026623413898050783,
"signal/frontier_coverage_15/centered_abs_mean": 0.1608198195695877,
"signal/frontier_coverage_15/group_std_mean": 0.20852963030338287,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.031917137652635576,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022997234016656877,
"signal/frontier_coverage_20/centered_abs_mean": 0.07476909160614013,
"signal/frontier_coverage_20/group_std_mean": 0.09405903220176696,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014968187920749188,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010691980132833123,
"signal/frontier_coverage_25/centered_abs_mean": 0.1143454447388649,
"signal/frontier_coverage_25/group_std_mean": 0.145367094874382,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022907671332359315,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00163513976149261,
"signal/frontier_coverage_5/centered_abs_mean": 0.1862773597240448,
"signal/frontier_coverage_5/group_std_mean": 0.24120102524757386,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03693324699997902,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026637662667781115,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31905388832092285,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39239723086357114,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44702168107032775,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031905388832092284,
"step": 195
},
{
"calibration/aurc": 0.14559693191858122,
"calibration/batch_distribution_entropy": 0.9603313286311146,
"calibration/buffer_distribution_entropy": 0.9761309450014393,
"calibration/confidence_entropy": 0.5010567276644734,
"calibration/coverage@0%": 0.016820017762200986,
"calibration/coverage@1%": 0.016820017762200986,
"calibration/coverage@10%": 0.3913710631099086,
"calibration/coverage@15%": 0.6379543175606383,
"calibration/coverage@20%": 0.8828092626318101,
"calibration/coverage@25%": 0.9403797520265744,
"calibration/coverage@30%": 0.9712192254589695,
"calibration/coverage@5%": 0.14037182782463625,
"calibration/ece": 0.1773901327159586,
"calibration/mean_confidence": 0.5998158055084197,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006944444444444442,
"completions/max_length": 3106.8,
"completions/max_terminated_length": 3106.8,
"completions/mean_length": 786.4341918945313,
"completions/mean_terminated_length": 791.9395629882813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 230.2,
"epoch": 0.47999400007499904,
"grad_norm": 0.0037775628734380007,
"learning_rate": 2.409638554216868e-07,
"loss": -0.0112,
"num_tokens": 427258248.0,
"reward": 0.9977959275245667,
"reward_std": 0.1135590761899948,
"rewards/accuracy_reward": 0.692968738079071,
"rewards/brier_reward": 0.8162111163139343,
"rewards/confidence_uniqueness_reward": 0.9434576988220215,
"rewards/format_reward": 0.9928819417953492,
"rewards/frontier_coverage_0": 0.03058276418596506,
"rewards/frontier_coverage_1": 0.03058276418596506,
"rewards/frontier_coverage_10": 0.030608633439987896,
"rewards/frontier_coverage_15": 0.034898260794579986,
"rewards/frontier_coverage_20": 0.0822305366396904,
"rewards/frontier_coverage_25": 0.1586170792579651,
"rewards/frontier_coverage_5": 0.03058276418596506,
"rewards/frontier_entropy_batch_reward": -0.26789160668849943,
"signal/accuracy_reward/centered_abs_mean": 0.13543294370174408,
"signal/accuracy_reward/group_std_mean": 0.1795397073030472,
"signal/accuracy_reward/group_zero_std_frac": 0.4833333432674408,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9879457116127014,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06771647185087204,
"signal/advantage_abs_mean": 0.7566372156143188,
"signal/advantage_pre_scale_abs_mean": 0.08582200407981873,
"signal/advantage_pre_scale_std": 0.13774674534797668,
"signal/advantage_std": 0.9830132961273194,
"signal/brier_reward/centered_abs_mean": 0.12440891414880753,
"signal/brier_reward/group_std_mean": 0.16094110310077667,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1832536369562149,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012440891563892364,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023261058330535888,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0364607434719801,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0340243112295866,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002326105860993266,
"signal/format_reward/centered_abs_mean": 0.011458333395421506,
"signal/format_reward/group_std_mean": 0.021999170631170274,
"signal/format_reward/group_zero_std_frac": 0.9027777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08310093134641647,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005729166697710753,
"signal/frontier_coverage_0/centered_abs_mean": 0.1727170765399933,
"signal/frontier_coverage_0/group_std_mean": 0.22679380774497987,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03630736693739891,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002469854103401303,
"signal/frontier_coverage_1/centered_abs_mean": 0.1727170765399933,
"signal/frontier_coverage_1/group_std_mean": 0.22679380774497987,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03630736693739891,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002469854103401303,
"signal/frontier_coverage_10/centered_abs_mean": 0.17263288795948029,
"signal/frontier_coverage_10/group_std_mean": 0.22668661475181578,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03628960847854614,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024686503689736127,
"signal/frontier_coverage_15/centered_abs_mean": 0.13041841089725495,
"signal/frontier_coverage_15/group_std_mean": 0.172030445933342,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027530809864401817,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018649833044037222,
"signal/frontier_coverage_20/centered_abs_mean": 0.07613101899623871,
"signal/frontier_coverage_20/group_std_mean": 0.09484113454818725,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016059026680886747,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010886735515668988,
"signal/frontier_coverage_25/centered_abs_mean": 0.11801900565624238,
"signal/frontier_coverage_25/group_std_mean": 0.14812451601028442,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024889787659049034,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016876716865226627,
"signal/frontier_coverage_5/centered_abs_mean": 0.1727170765399933,
"signal/frontier_coverage_5/group_std_mean": 0.22679380774497987,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03630736693739891,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002469854103401303,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31580972075462344,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38427644968032837,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46509563326835635,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03158097080886364,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_calibration/aurc": 0.16057820298915118,
"eval_calibration/batch_distribution_entropy": 0.914145618708674,
"eval_calibration/buffer_distribution_entropy": 0.9753044494695922,
"eval_calibration/confidence_entropy": 0.4683044711172521,
"eval_calibration/coverage@0%": 0.22395833333333334,
"eval_calibration/coverage@1%": 0.22395833333333334,
"eval_calibration/coverage@10%": 0.5677083333333334,
"eval_calibration/coverage@15%": 0.721606182795699,
"eval_calibration/coverage@20%": 0.81065188172043,
"eval_calibration/coverage@25%": 0.873991935483871,
"eval_calibration/coverage@30%": 0.947244623655914,
"eval_calibration/coverage@5%": 0.2708333333333333,
"eval_calibration/ece": 0.2455773573190524,
"eval_calibration/mean_confidence": 0.5615903357669692,
"eval_completions/clipped_ratio": 0.002604166666666685,
"eval_completions/max_length": 2298.3333333333335,
"eval_completions/max_terminated_length": 2298.3333333333335,
"eval_completions/mean_length": 798.5604349772135,
"eval_completions/mean_terminated_length": 800.6705627441406,
"eval_completions/min_length": 134.0,
"eval_completions/min_terminated_length": 276.1666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 427258248.0,
"eval_reward": 0.9212540686130524,
"eval_reward_std": 0.2215366984407107,
"eval_rewards/accuracy_reward": 0.6866319378217062,
"eval_rewards/brier_reward": 0.8282056351502737,
"eval_rewards/confidence_uniqueness_reward": 0.8917240798473358,
"eval_rewards/format_reward": 0.9973958432674408,
"eval_rewards/frontier_coverage_0": 0.047293830662965775,
"eval_rewards/frontier_coverage_1": 0.047293830662965775,
"eval_rewards/frontier_coverage_10": 0.04729795269668102,
"eval_rewards/frontier_coverage_15": 0.04440750305851301,
"eval_rewards/frontier_coverage_20": 0.08812103296319644,
"eval_rewards/frontier_coverage_25": 0.16687769691149393,
"eval_rewards/frontier_coverage_5": 0.047292555992801986,
"eval_rewards/frontier_entropy_batch_reward": -0.9973958432674408,
"eval_runtime": 166.4081,
"eval_samples_per_second": 6.009,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4191080729166667,
"eval_signal/accuracy_reward/group_std_mean": 0.4639366815487544,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9535810748736063,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20955403645833334,
"eval_signal/advantage_abs_mean": 0.8791789809862772,
"eval_signal/advantage_pre_scale_abs_mean": 0.1954043780763944,
"eval_signal/advantage_pre_scale_std": 0.21952204157908758,
"eval_signal/advantage_std": 0.9863749047120413,
"eval_signal/brier_reward/centered_abs_mean": 0.16421432544787726,
"eval_signal/brier_reward/group_std_mean": 0.22416182110706964,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0745612805088361,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.016421433072537184,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04440143456061681,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06044746252397696,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020217653984824818,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004440143549193938,
"eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376,
"eval_signal/format_reward/group_std_mean": 0.014731391333043575,
"eval_signal/format_reward/group_zero_std_frac": 0.9166666865348816,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011211627162992954,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.296345055103302,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4005911747614543,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01929074029127757,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004237734169388811,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.296345055103302,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4005911747614543,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01929074029127757,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004237734169388811,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.296173761288325,
"eval_signal/frontier_coverage_10/group_std_mean": 0.40038461486498517,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0192795991897583,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004235284713407357,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.17665722717841467,
"eval_signal/frontier_coverage_15/group_std_mean": 0.24827261020739874,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01150304094577829,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025261982421701155,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10272979860504468,
"eval_signal/frontier_coverage_20/group_std_mean": 0.13188674176732698,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006687632451454799,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014690360403619707,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.19587110231320062,
"eval_signal/frontier_coverage_25/group_std_mean": 0.24194891502459845,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01274662526945273,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002800956523666779,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.29633869727452594,
"eval_signal/frontier_coverage_5/group_std_mean": 0.40058427552382153,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019290315608183544,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004237643443048,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0022423254946867623,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045572955471774,
"eval_steps_per_second": 0.036,
"step": 200
},
{
"calibration/aurc": 0.1186967407697009,
"calibration/batch_distribution_entropy": 0.9550775718290982,
"calibration/buffer_distribution_entropy": 0.9759632840081753,
"calibration/confidence_entropy": 0.47419539528335974,
"calibration/coverage@0%": 0.026565226876090753,
"calibration/coverage@1%": 0.026565226876090753,
"calibration/coverage@10%": 0.4854984729493892,
"calibration/coverage@15%": 0.8464511950409872,
"calibration/coverage@20%": 0.9174506613422097,
"calibration/coverage@25%": 0.9550392670157068,
"calibration/coverage@30%": 0.9853403141361257,
"calibration/coverage@5%": 0.3022660340314136,
"calibration/ece": 0.18733712170138508,
"calibration/mean_confidence": 0.6067382862272922,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004947916666666652,
"completions/max_length": 3626.8,
"completions/max_terminated_length": 3626.8,
"completions/mean_length": 804.612939453125,
"completions/mean_terminated_length": 808.594140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 237.4,
"epoch": 0.491993850076874,
"grad_norm": 0.0037702268455177546,
"learning_rate": 9.036144578313253e-08,
"loss": -0.0034,
"num_tokens": 439593341.0,
"reward": 1.0242285490036012,
"reward_std": 0.11114266216754913,
"rewards/accuracy_reward": 0.7535590410232544,
"rewards/brier_reward": 0.8131016492843628,
"rewards/confidence_uniqueness_reward": 0.94401034116745,
"rewards/format_reward": 0.9950520753860473,
"rewards/frontier_coverage_0": -0.009763723891228437,
"rewards/frontier_coverage_1": -0.009763723891228437,
"rewards/frontier_coverage_10": -0.00971116297878325,
"rewards/frontier_coverage_15": 0.01904887929558754,
"rewards/frontier_coverage_20": 0.0988022267818451,
"rewards/frontier_coverage_25": 0.18960395753383635,
"rewards/frontier_coverage_5": -0.0097591457888484,
"rewards/frontier_entropy_batch_reward": -0.29627181887626647,
"signal/accuracy_reward/centered_abs_mean": 0.13992513120174407,
"signal/accuracy_reward/group_std_mean": 0.18550328016281128,
"signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0376464486122132,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06996256560087204,
"signal/advantage_abs_mean": 0.7619848847389221,
"signal/advantage_pre_scale_abs_mean": 0.08387549370527267,
"signal/advantage_pre_scale_std": 0.1350790113210678,
"signal/advantage_std": 0.9830043315887451,
"signal/brier_reward/centered_abs_mean": 0.129868845641613,
"signal/brier_reward/group_std_mean": 0.1658725470304489,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1923790842294693,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012986884266138077,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02169004678726196,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03326268345117569,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03214513845741749,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002169004688039422,
"signal/format_reward/centered_abs_mean": 0.008241102285683154,
"signal/format_reward/group_std_mean": 0.01677692960947752,
"signal/format_reward/group_zero_std_frac": 0.9250000238418579,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06075965389609337,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004120551142841577,
"signal/frontier_coverage_0/centered_abs_mean": 0.18534817099571227,
"signal/frontier_coverage_0/group_std_mean": 0.23780874013900757,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039334161579608916,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026504788547754288,
"signal/frontier_coverage_1/centered_abs_mean": 0.18534817099571227,
"signal/frontier_coverage_1/group_std_mean": 0.23780874013900757,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039334161579608916,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026504788547754288,
"signal/frontier_coverage_10/centered_abs_mean": 0.1850076824426651,
"signal/frontier_coverage_10/group_std_mean": 0.23738227784633636,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039263205230236055,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026456098072230815,
"signal/frontier_coverage_15/centered_abs_mean": 0.09948588758707047,
"signal/frontier_coverage_15/group_std_mean": 0.12873928546905516,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021109068393707277,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014226482482627035,
"signal/frontier_coverage_20/centered_abs_mean": 0.08255493640899658,
"signal/frontier_coverage_20/group_std_mean": 0.10234281718730927,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01751541830599308,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011805356247350574,
"signal/frontier_coverage_25/centered_abs_mean": 0.12076869606971741,
"signal/frontier_coverage_25/group_std_mean": 0.15139889121055602,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025616540387272835,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017269923351705073,
"signal/frontier_coverage_5/centered_abs_mean": 0.18533942103385925,
"signal/frontier_coverage_5/group_std_mean": 0.2377980649471283,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03933229818940163,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002650353778153658,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3371506452560425,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40471735000610354,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5003003001213073,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03371506631374359,
"step": 205
},
{
"calibration/aurc": 0.1272282988853613,
"calibration/batch_distribution_entropy": 0.9597042176053079,
"calibration/buffer_distribution_entropy": 0.9764644981690312,
"calibration/confidence_entropy": 0.4988459667423297,
"calibration/coverage@0%": 0.03220648332796521,
"calibration/coverage@1%": 0.03220648332796521,
"calibration/coverage@10%": 0.42679129860725634,
"calibration/coverage@15%": 0.716446344990166,
"calibration/coverage@20%": 0.8498751624715318,
"calibration/coverage@25%": 0.9440389794617708,
"calibration/coverage@30%": 0.9772528433945756,
"calibration/coverage@5%": 0.08715098602230857,
"calibration/ece": 0.13103508682212492,
"calibration/mean_confidence": 0.6140400005582972,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0028935185185185266,
"completions/max_length": 3365.0,
"completions/max_terminated_length": 3365.0,
"completions/mean_length": 805.1265869140625,
"completions/mean_terminated_length": 807.4395955403646,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.49919376007799904,
"num_tokens": 447024424.0,
"reward": 1.0055522521336873,
"reward_std": 0.10900399088859558,
"rewards/accuracy_reward": 0.7063078681627909,
"rewards/brier_reward": 0.8075371583302816,
"rewards/confidence_uniqueness_reward": 0.9471040964126587,
"rewards/format_reward": 0.9971064925193787,
"rewards/frontier_coverage_0": 0.008651394241799911,
"rewards/frontier_coverage_1": 0.008651394241799911,
"rewards/frontier_coverage_10": 0.008720822011431059,
"rewards/frontier_coverage_15": 0.03024888038635254,
"rewards/frontier_coverage_20": 0.09493551154931386,
"rewards/frontier_coverage_25": 0.173322523633639,
"rewards/frontier_coverage_5": 0.008652187573413054,
"rewards/frontier_entropy_batch_reward": -0.2638363142808278,
"signal/accuracy_reward/centered_abs_mean": 0.1340241606036822,
"signal/accuracy_reward/group_std_mean": 0.18462320665518442,
"signal/accuracy_reward/group_zero_std_frac": 0.4583333333333333,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0006801684697468,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0670120803018411,
"signal/advantage_abs_mean": 0.7406850457191467,
"signal/advantage_pre_scale_abs_mean": 0.07968033105134964,
"signal/advantage_pre_scale_std": 0.12958685557047525,
"signal/advantage_std": 0.9829863905906677,
"signal/brier_reward/centered_abs_mean": 0.12960121283928552,
"signal/brier_reward/group_std_mean": 0.16745843489964804,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19411064187685648,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012960121346016725,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01816164267559846,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02970569891234239,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027298261721928913,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001816164197710653,
"signal/format_reward/centered_abs_mean": 0.0055157696673025685,
"signal/format_reward/group_std_mean": 0.014287550002336502,
"signal/format_reward/group_zero_std_frac": 0.9259259502092997,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04154850294192632,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0027578848336512842,
"signal/frontier_coverage_0/centered_abs_mean": 0.17834581434726715,
"signal/frontier_coverage_0/group_std_mean": 0.23521957298119864,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038279421627521515,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00255034522463878,
"signal/frontier_coverage_1/centered_abs_mean": 0.17834581434726715,
"signal/frontier_coverage_1/group_std_mean": 0.23521957298119864,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038279421627521515,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00255034522463878,
"signal/frontier_coverage_10/centered_abs_mean": 0.17744634052117667,
"signal/frontier_coverage_10/group_std_mean": 0.23404847085475922,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.038087598979473114,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025374825733403363,
"signal/frontier_coverage_15/centered_abs_mean": 0.07600981990496318,
"signal/frontier_coverage_15/group_std_mean": 0.099979134897391,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016342710082729656,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010869404068216681,
"signal/frontier_coverage_20/centered_abs_mean": 0.08222619444131851,
"signal/frontier_coverage_20/group_std_mean": 0.10303841282924016,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01762464890877406,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011758345644921064,
"signal/frontier_coverage_25/centered_abs_mean": 0.12105090419451396,
"signal/frontier_coverage_25/group_std_mean": 0.15320136646429697,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02592242571214835,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017310279266287882,
"signal/frontier_coverage_5/centered_abs_mean": 0.17833813031514487,
"signal/frontier_coverage_5/group_std_mean": 0.23520942529042563,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0382777601480484,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002550235173354546,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3061721622943878,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37579457958539325,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4601670801639557,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03061721660196781,
"step": 208,
"total_flos": 0.0,
"train_loss": -0.022534527126341485,
"train_runtime": 39191.2693,
"train_samples_per_second": 0.383,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 208,
"num_input_tokens_seen": 447024424,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}