Files
RLCR-v4-ks-uniqueness-sft-math/trainer_state.json
ModelHub XC dfb16b2797 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-sft-math
Source: Original Platform
2026-06-15 09:14:18 +08:00

5721 lines
355 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49919376007799904,
"eval_steps": 50,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.36967823482088125,
"calibration/batch_distribution_entropy": 0.7862026048473938,
"calibration/confidence_entropy": 0.29413324102623034,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.08602150537634409,
"calibration/coverage@20%": 0.11326164874551972,
"calibration/coverage@25%": 0.13189964157706094,
"calibration/coverage@30%": 0.16774193548387098,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.24784627572046558,
"calibration/mean_confidence": 0.6899520713084326,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.043923611111111115,
"completions/max_length": 4052.0,
"completions/max_terminated_length": 4052.0,
"completions/mean_length": 1002.0799560546875,
"completions/mean_terminated_length": 1048.1506591796874,
"completions/min_length": 0.0,
"completions/min_terminated_length": 215.0,
"epoch": 0.011999850001874977,
"grad_norm": 0.0019047551322728395,
"learning_rate": 5.952380952380953e-07,
"loss": -0.0216,
"num_tokens": 14658169.0,
"reward": 0.7684701919555664,
"reward_std": 0.5013577401638031,
"rewards/accuracy_reward": 0.34930555820465087,
"rewards/brier_reward": 0.49733580350875856,
"rewards/confidence_uniqueness_reward": 0.49042509198188783,
"rewards/format_reward": 0.7189236164093018,
"rewards/frontier_aurc_reward": 0.4430105030536652,
"rewards/frontier_coverage_1": 0.4430105030536652,
"rewards/frontier_coverage_10": 0.4430105030536652,
"rewards/frontier_coverage_15": 0.4430105030536652,
"rewards/frontier_coverage_20": 0.4430105030536652,
"rewards/frontier_coverage_25": 0.4430105030536652,
"rewards/frontier_coverage_5": 0.4430105030536652,
"rewards/frontier_ece_reward": 0.4430105030536652,
"signal/accuracy_reward/centered_abs_mean": 0.3296006917953491,
"signal/accuracy_reward/group_std_mean": 0.38530040979385377,
"signal/accuracy_reward/group_zero_std_frac": 0.07500000223517418,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16480034589767456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16480034589767456,
"signal/advantage_abs_mean": 0.42119367718696593,
"signal/advantage_pre_scale_abs_mean": 0.42119367718696593,
"signal/advantage_pre_scale_std": 0.502716064453125,
"signal/advantage_std": 0.502716064453125,
"signal/brier_reward/centered_abs_mean": 0.3650480568408966,
"signal/brier_reward/group_std_mean": 0.4109442591667175,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04563100710511207,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.04563100710511207,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.33325800895690916,
"signal/confidence_uniqueness_reward/group_std_mean": 0.37558268308639525,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041657251119613645,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.041657251119613645,
"signal/format_reward/centered_abs_mean": 0.3422960102558136,
"signal/format_reward/group_std_mean": 0.41390294432640073,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1711480051279068,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1711480051279068,
"signal/frontier_aurc_reward/centered_abs_mean": 0.36038182973861693,
"signal/frontier_aurc_reward/group_std_mean": 0.4062395691871643,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_1/centered_abs_mean": 0.36038182973861693,
"signal/frontier_coverage_1/group_std_mean": 0.4062395691871643,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_10/centered_abs_mean": 0.36038182973861693,
"signal/frontier_coverage_10/group_std_mean": 0.4062395691871643,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_15/centered_abs_mean": 0.36038182973861693,
"signal/frontier_coverage_15/group_std_mean": 0.4062395691871643,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_20/centered_abs_mean": 0.36038182973861693,
"signal/frontier_coverage_20/group_std_mean": 0.4062395691871643,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_25/centered_abs_mean": 0.36038182973861693,
"signal/frontier_coverage_25/group_std_mean": 0.4062395691871643,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_5/centered_abs_mean": 0.36038182973861693,
"signal/frontier_coverage_5/group_std_mean": 0.4062395691871643,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006450834404677153,
"signal/frontier_ece_reward/centered_abs_mean": 0.36038182973861693,
"signal/frontier_ece_reward/group_std_mean": 0.4062395691871643,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.04504772871732712,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.04504772871732712,
"step": 5
},
{
"calibration/aurc": 0.32589507602858425,
"calibration/batch_distribution_entropy": 0.7834614298983764,
"calibration/confidence_entropy": 0.3043506317779947,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.06793650793650793,
"calibration/coverage@20%": 0.11428571428571428,
"calibration/coverage@25%": 0.3277179106936893,
"calibration/coverage@30%": 0.4927966937433446,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.24205344081355856,
"calibration/mean_confidence": 0.6932854483595139,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04166666666666667,
"completions/max_length": 3819.0,
"completions/max_terminated_length": 3819.0,
"completions/mean_length": 964.8975708007813,
"completions/mean_terminated_length": 1007.06064453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 243.4,
"epoch": 0.023999700003749954,
"grad_norm": 0.0010482864454388618,
"learning_rate": 1.1904761904761906e-06,
"loss": -0.0304,
"num_tokens": 28856509.0,
"reward": 0.8811465501785278,
"reward_std": 0.44658924341201783,
"rewards/accuracy_reward": 0.4013020873069763,
"rewards/brier_reward": 0.5648763060569764,
"rewards/confidence_uniqueness_reward": 0.5875549912452698,
"rewards/format_reward": 0.8222222208976746,
"rewards/frontier_aurc_reward": 0.5007211208343506,
"rewards/frontier_coverage_1": 0.5007211208343506,
"rewards/frontier_coverage_10": 0.5007211208343506,
"rewards/frontier_coverage_15": 0.5007211208343506,
"rewards/frontier_coverage_20": 0.5007211208343506,
"rewards/frontier_coverage_25": 0.5007211208343506,
"rewards/frontier_coverage_5": 0.5007211208343506,
"rewards/frontier_ece_reward": 0.5007211208343506,
"signal/accuracy_reward/centered_abs_mean": 0.3191786050796509,
"signal/accuracy_reward/group_std_mean": 0.3830325841903687,
"signal/accuracy_reward/group_zero_std_frac": 0.05555555615574122,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15958930253982545,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15958930253982545,
"signal/advantage_abs_mean": 0.35644559264183046,
"signal/advantage_pre_scale_abs_mean": 0.35644559264183046,
"signal/advantage_pre_scale_std": 0.4495138943195343,
"signal/advantage_std": 0.4495138943195343,
"signal/brier_reward/centered_abs_mean": 0.3444161355495453,
"signal/brier_reward/group_std_mean": 0.39449760913848875,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.043052016943693164,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.043052016943693164,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.3033804833889008,
"signal/confidence_uniqueness_reward/group_std_mean": 0.34619358777999876,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0379225604236126,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0379225604236126,
"signal/format_reward/centered_abs_mean": 0.2525716066360474,
"signal/format_reward/group_std_mean": 0.3450364053249359,
"signal/format_reward/group_zero_std_frac": 0.03333333358168602,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1262858033180237,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1262858033180237,
"signal/frontier_aurc_reward/centered_abs_mean": 0.35025461912155154,
"signal/frontier_aurc_reward/group_std_mean": 0.39719846844673157,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_1/centered_abs_mean": 0.35025461912155154,
"signal/frontier_coverage_1/group_std_mean": 0.39719846844673157,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_10/centered_abs_mean": 0.35025461912155154,
"signal/frontier_coverage_10/group_std_mean": 0.39719846844673157,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_15/centered_abs_mean": 0.35025461912155154,
"signal/frontier_coverage_15/group_std_mean": 0.39719846844673157,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_20/centered_abs_mean": 0.35025461912155154,
"signal/frontier_coverage_20/group_std_mean": 0.39719846844673157,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_25/centered_abs_mean": 0.35025461912155154,
"signal/frontier_coverage_25/group_std_mean": 0.39719846844673157,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_5/centered_abs_mean": 0.35025461912155154,
"signal/frontier_coverage_5/group_std_mean": 0.39719846844673157,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0062695578671991825,
"signal/frontier_ece_reward/centered_abs_mean": 0.35025461912155154,
"signal/frontier_ece_reward/group_std_mean": 0.39719846844673157,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.04378182739019394,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.04378182739019394,
"step": 10
},
{
"calibration/aurc": 0.381261368050961,
"calibration/batch_distribution_entropy": 0.8395895245268612,
"calibration/confidence_entropy": 0.3122351741378401,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.06910112359550562,
"calibration/coverage@20%": 0.1,
"calibration/coverage@25%": 0.1846394737670349,
"calibration/coverage@30%": 0.31916833953903984,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.23610914584010007,
"calibration/mean_confidence": 0.6068558516881933,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04366319444444444,
"completions/max_length": 4003.8,
"completions/max_terminated_length": 4003.8,
"completions/mean_length": 960.4666748046875,
"completions/mean_terminated_length": 1004.5089233398437,
"completions/min_length": 0.0,
"completions/min_terminated_length": 264.4,
"epoch": 0.03599955000562493,
"grad_norm": 0.0007612162153236568,
"learning_rate": 1.7857142857142859e-06,
"loss": -0.0424,
"num_tokens": 43023069.0,
"reward": 0.9628929138183594,
"reward_std": 0.3616787016391754,
"rewards/accuracy_reward": 0.39479166865348814,
"rewards/brier_reward": 0.6317438364028931,
"rewards/confidence_uniqueness_reward": 0.7069078326225281,
"rewards/format_reward": 0.9173611164093017,
"rewards/frontier_aurc_reward": 0.5572714924812316,
"rewards/frontier_coverage_1": 0.5572714924812316,
"rewards/frontier_coverage_10": 0.5572714924812316,
"rewards/frontier_coverage_15": 0.5572714924812316,
"rewards/frontier_coverage_20": 0.5572714924812316,
"rewards/frontier_coverage_25": 0.5572714924812316,
"rewards/frontier_coverage_5": 0.5572714924812316,
"rewards/frontier_ece_reward": 0.5572714924812316,
"signal/accuracy_reward/centered_abs_mean": 0.3097439229488373,
"signal/accuracy_reward/group_std_mean": 0.3701206386089325,
"signal/accuracy_reward/group_zero_std_frac": 0.09444444626569748,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15487196147441865,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15487196147441865,
"signal/advantage_abs_mean": 0.27835277616977694,
"signal/advantage_pre_scale_abs_mean": 0.27835277616977694,
"signal/advantage_pre_scale_std": 0.36725740432739257,
"signal/advantage_std": 0.36725740432739257,
"signal/brier_reward/centered_abs_mean": 0.3213726282119751,
"signal/brier_reward/group_std_mean": 0.3753271162509918,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04017157852649689,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.04017157852649689,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21762884259223939,
"signal/confidence_uniqueness_reward/group_std_mean": 0.27037290930747987,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027203605324029923,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027203605324029923,
"signal/format_reward/centered_abs_mean": 0.13908420354127884,
"signal/format_reward/group_std_mean": 0.23201583325862885,
"signal/format_reward/group_zero_std_frac": 0.1833333358168602,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06954210177063942,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.06954210177063942,
"signal/frontier_aurc_reward/centered_abs_mean": 0.3441716134548187,
"signal/frontier_aurc_reward/group_std_mean": 0.3906739056110382,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_1/centered_abs_mean": 0.3441716134548187,
"signal/frontier_coverage_1/group_std_mean": 0.3906739056110382,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_10/centered_abs_mean": 0.3441716134548187,
"signal/frontier_coverage_10/group_std_mean": 0.3906739056110382,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_15/centered_abs_mean": 0.3441716134548187,
"signal/frontier_coverage_15/group_std_mean": 0.3906739056110382,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_20/centered_abs_mean": 0.3441716134548187,
"signal/frontier_coverage_20/group_std_mean": 0.3906739056110382,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_25/centered_abs_mean": 0.3441716134548187,
"signal/frontier_coverage_25/group_std_mean": 0.3906739056110382,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_5/centered_abs_mean": 0.3441716134548187,
"signal/frontier_coverage_5/group_std_mean": 0.3906739056110382,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006160671729594469,
"signal/frontier_ece_reward/centered_abs_mean": 0.3441716134548187,
"signal/frontier_ece_reward/group_std_mean": 0.3906739056110382,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.04302145168185234,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.04302145168185234,
"step": 15
},
{
"calibration/aurc": 0.4027054789391406,
"calibration/batch_distribution_entropy": 0.9028449005084616,
"calibration/buffer_distribution_entropy": 0.8350432189471256,
"calibration/confidence_entropy": 0.3532060861660855,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.05249343832020997,
"calibration/coverage@20%": 0.17029932428659184,
"calibration/coverage@25%": 0.2729503870355258,
"calibration/coverage@30%": 0.333190300423154,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.22552573548401694,
"calibration/mean_confidence": 0.49134779142387586,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03498263888888886,
"completions/max_length": 3970.4,
"completions/max_terminated_length": 3970.4,
"completions/mean_length": 953.0729248046875,
"completions/mean_terminated_length": 987.8788208007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 258.4,
"epoch": 0.04799940000749991,
"grad_norm": 0.0005136204999871552,
"learning_rate": 2.380952380952381e-06,
"loss": -0.0324,
"num_tokens": 57116165.0,
"reward": 0.9295851826667786,
"reward_std": 0.23658435344696044,
"rewards/accuracy_reward": 0.4424479126930237,
"rewards/brier_reward": 0.6920093059539795,
"rewards/confidence_uniqueness_reward": 0.7875280380249023,
"rewards/format_reward": 0.96015625,
"rewards/frontier_aurc_reward": 0.11949877790175378,
"rewards/frontier_coverage_1": 0.22551958113908768,
"rewards/frontier_coverage_10": 0.22551958113908768,
"rewards/frontier_coverage_15": 0.22551958113908768,
"rewards/frontier_coverage_20": 0.22551958113908768,
"rewards/frontier_coverage_25": 0.22551958113908768,
"rewards/frontier_coverage_5": 0.22551958113908768,
"rewards/frontier_ece_reward": 0.1358485657721758,
"signal/accuracy_reward/centered_abs_mean": 0.2820800840854645,
"signal/accuracy_reward/group_std_mean": 0.34979713559150694,
"signal/accuracy_reward/group_zero_std_frac": 0.10000000149011612,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14104004204273224,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14104004204273224,
"signal/advantage_abs_mean": 0.1771962672472,
"signal/advantage_pre_scale_abs_mean": 0.1771962672472,
"signal/advantage_pre_scale_std": 0.24651205241680146,
"signal/advantage_std": 0.24651205241680146,
"signal/brier_reward/centered_abs_mean": 0.2946962535381317,
"signal/brier_reward/group_std_mean": 0.350597482919693,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036837031692266466,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.036837031692266466,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15454766154289246,
"signal/confidence_uniqueness_reward/group_std_mean": 0.19427459239959716,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019318457692861557,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019318457692861557,
"signal/format_reward/centered_abs_mean": 0.06775716096162795,
"signal/format_reward/group_std_mean": 0.12582100331783294,
"signal/format_reward/group_zero_std_frac": 0.4916666805744171,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.03387858048081398,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.03387858048081398,
"signal/frontier_aurc_reward/centered_abs_mean": 0.07141957976855337,
"signal/frontier_aurc_reward/group_std_mean": 0.08215194744989276,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0012784103186277208,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0012784103186277208,
"signal/frontier_coverage_1/centered_abs_mean": 0.3562255322933197,
"signal/frontier_coverage_1/group_std_mean": 0.45040944814682005,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_10/centered_abs_mean": 0.3562255322933197,
"signal/frontier_coverage_10/group_std_mean": 0.45040944814682005,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_15/centered_abs_mean": 0.3562255322933197,
"signal/frontier_coverage_15/group_std_mean": 0.45040944814682005,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_20/centered_abs_mean": 0.3562255322933197,
"signal/frontier_coverage_20/group_std_mean": 0.45040944814682005,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_25/centered_abs_mean": 0.3562255322933197,
"signal/frontier_coverage_25/group_std_mean": 0.45040944814682005,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_5/centered_abs_mean": 0.3562255322933197,
"signal/frontier_coverage_5/group_std_mean": 0.45040944814682005,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006376436538994312,
"signal/frontier_ece_reward/centered_abs_mean": 0.1151403695344925,
"signal/frontier_ece_reward/group_std_mean": 0.13498959243297576,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014392546191811562,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014392546191811562,
"step": 20
},
{
"calibration/aurc": 0.3614334508581706,
"calibration/batch_distribution_entropy": 0.9248183320981849,
"calibration/buffer_distribution_entropy": 0.873212498255721,
"calibration/confidence_entropy": 0.4128546769676536,
"calibration/coverage@0%": 0.014285714285714285,
"calibration/coverage@1%": 0.014285714285714285,
"calibration/coverage@10%": 0.09585753000387147,
"calibration/coverage@15%": 0.14609627048651438,
"calibration/coverage@20%": 0.1788488837269325,
"calibration/coverage@25%": 0.24317976513098466,
"calibration/coverage@30%": 0.37461607949412823,
"calibration/coverage@5%": 0.024867724867724865,
"calibration/ece": 0.24605077384370877,
"calibration/mean_confidence": 0.38649038439891786,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02725694444444442,
"completions/max_length": 3480.2,
"completions/max_terminated_length": 3480.2,
"completions/mean_length": 929.60078125,
"completions/mean_terminated_length": 955.7419311523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 270.0,
"epoch": 0.05999925000937488,
"grad_norm": 0.00046045094495639205,
"learning_rate": 2.9761904761904763e-06,
"loss": -0.025,
"num_tokens": 70949614.0,
"reward": 0.9276612401008606,
"reward_std": 0.19562409222126007,
"rewards/accuracy_reward": 0.47786458134651183,
"rewards/brier_reward": 0.6938265442848206,
"rewards/confidence_uniqueness_reward": 0.8463589429855347,
"rewards/format_reward": 0.9715277791023255,
"rewards/frontier_aurc_reward": -0.003473227610811591,
"rewards/frontier_coverage_1": 0.08209572061896324,
"rewards/frontier_coverage_10": 0.08209572061896324,
"rewards/frontier_coverage_15": 0.08209572061896324,
"rewards/frontier_coverage_20": 0.08209572061896324,
"rewards/frontier_coverage_25": 0.08209572061896324,
"rewards/frontier_coverage_5": 0.08209572061896324,
"rewards/frontier_ece_reward": 0.013495722971856593,
"signal/accuracy_reward/centered_abs_mean": 0.293115234375,
"signal/accuracy_reward/group_std_mean": 0.36036287546157836,
"signal/accuracy_reward/group_zero_std_frac": 0.07777778003364802,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1465576171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1465576171875,
"signal/advantage_abs_mean": 0.14612423181533812,
"signal/advantage_pre_scale_abs_mean": 0.14612423181533812,
"signal/advantage_pre_scale_std": 0.2055502027273178,
"signal/advantage_std": 0.2055502027273178,
"signal/brier_reward/centered_abs_mean": 0.27127314209938047,
"signal/brier_reward/group_std_mean": 0.32418668270111084,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03390914276242256,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03390914276242256,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09788585007190705,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13583073616027833,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01223573125898838,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01223573125898838,
"signal/format_reward/centered_abs_mean": 0.04701605886220932,
"signal/format_reward/group_std_mean": 0.08671137690544128,
"signal/format_reward/group_zero_std_frac": 0.6472222447395325,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02350802943110466,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02350802943110466,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002270214632153511,
"signal/frontier_aurc_reward/group_std_mean": 0.0033901261631399393,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0636840276420115e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0636840276420115e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.39322843551635744,
"signal/frontier_coverage_1/group_std_mean": 0.48915119767189025,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_10/centered_abs_mean": 0.39322843551635744,
"signal/frontier_coverage_10/group_std_mean": 0.48915119767189025,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_15/centered_abs_mean": 0.39322843551635744,
"signal/frontier_coverage_15/group_std_mean": 0.48915119767189025,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_20/centered_abs_mean": 0.39322843551635744,
"signal/frontier_coverage_20/group_std_mean": 0.48915119767189025,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_25/centered_abs_mean": 0.39322843551635744,
"signal/frontier_coverage_25/group_std_mean": 0.48915119767189025,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_5/centered_abs_mean": 0.39322843551635744,
"signal/frontier_coverage_5/group_std_mean": 0.48915119767189025,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007038788590580225,
"signal/frontier_ece_reward/centered_abs_mean": 0.04797838628292084,
"signal/frontier_ece_reward/group_std_mean": 0.061146382987499234,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005997298285365105,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005997298285365105,
"step": 25
},
{
"calibration/aurc": 0.2923284819883589,
"calibration/batch_distribution_entropy": 0.9250302282912619,
"calibration/buffer_distribution_entropy": 0.9063994573641947,
"calibration/confidence_entropy": 0.4841688589725296,
"calibration/coverage@0%": 0.008497994354479274,
"calibration/coverage@1%": 0.008497994354479274,
"calibration/coverage@10%": 0.05956929646528768,
"calibration/coverage@15%": 0.10252254939375434,
"calibration/coverage@20%": 0.19076890769300306,
"calibration/coverage@25%": 0.3561873535093358,
"calibration/coverage@30%": 0.5256556345399412,
"calibration/coverage@5%": 0.008497994354479274,
"calibration/ece": 0.21817575367042902,
"calibration/mean_confidence": 0.3925755252888396,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025520833333333305,
"completions/max_length": 3789.4,
"completions/max_terminated_length": 3789.4,
"completions/mean_length": 903.4365356445312,
"completions/mean_terminated_length": 927.2119506835937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 270.6,
"epoch": 0.07199910001124986,
"grad_norm": 0.0004290464275982231,
"learning_rate": 3.5714285714285718e-06,
"loss": -0.0226,
"num_tokens": 84467123.0,
"reward": 0.9338800191879273,
"reward_std": 0.1860422283411026,
"rewards/accuracy_reward": 0.4858506917953491,
"rewards/brier_reward": 0.7087279558181763,
"rewards/confidence_uniqueness_reward": 0.8569933891296386,
"rewards/format_reward": 0.9730902910232544,
"rewards/frontier_aurc_reward": -0.0031799635384231807,
"rewards/frontier_coverage_1": 0.06800358705222606,
"rewards/frontier_coverage_10": 0.06800358705222606,
"rewards/frontier_coverage_15": 0.06800358705222606,
"rewards/frontier_coverage_20": 0.06800358705222606,
"rewards/frontier_coverage_25": 0.06800358705222606,
"rewards/frontier_coverage_5": 0.06800358705222606,
"rewards/frontier_ece_reward": 0.01158127374947071,
"signal/accuracy_reward/centered_abs_mean": 0.2845431864261627,
"signal/accuracy_reward/group_std_mean": 0.35211129784584044,
"signal/accuracy_reward/group_zero_std_frac": 0.08888889104127884,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14227159321308136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14227159321308136,
"signal/advantage_abs_mean": 0.13841767907142638,
"signal/advantage_pre_scale_abs_mean": 0.13841767907142638,
"signal/advantage_pre_scale_std": 0.19867367446422576,
"signal/advantage_std": 0.19867367446422576,
"signal/brier_reward/centered_abs_mean": 0.2366869866847992,
"signal/brier_reward/group_std_mean": 0.28489047288894653,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0295858733355999,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0295858733355999,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09566812962293625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13221579790115356,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011958516202867031,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011958516202867031,
"signal/format_reward/centered_abs_mean": 0.0448133684694767,
"signal/format_reward/group_std_mean": 0.0826771542429924,
"signal/format_reward/group_zero_std_frac": 0.6638888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02240668423473835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02240668423473835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001989086694084108,
"signal/frontier_aurc_reward/group_std_mean": 0.0028138306923210623,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.560464974725619e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.560464974725619e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.3589896261692047,
"signal/frontier_coverage_1/group_std_mean": 0.4434766948223114,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_10/centered_abs_mean": 0.3589896261692047,
"signal/frontier_coverage_10/group_std_mean": 0.4434766948223114,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_15/centered_abs_mean": 0.3589896261692047,
"signal/frontier_coverage_15/group_std_mean": 0.4434766948223114,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_20/centered_abs_mean": 0.3589896261692047,
"signal/frontier_coverage_20/group_std_mean": 0.4434766948223114,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_25/centered_abs_mean": 0.3589896261692047,
"signal/frontier_coverage_25/group_std_mean": 0.4434766948223114,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_5/centered_abs_mean": 0.3589896261692047,
"signal/frontier_coverage_5/group_std_mean": 0.4434766948223114,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006425914168357849,
"signal/frontier_ece_reward/centered_abs_mean": 0.03801303133368492,
"signal/frontier_ece_reward/group_std_mean": 0.04951959177851677,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004751628916710615,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004751628916710615,
"step": 30
},
{
"calibration/aurc": 0.3133094829157582,
"calibration/batch_distribution_entropy": 0.9464168853922367,
"calibration/buffer_distribution_entropy": 0.9315091404755963,
"calibration/confidence_entropy": 0.5289805554461637,
"calibration/coverage@0%": 0.00853084623941131,
"calibration/coverage@1%": 0.00853084623941131,
"calibration/coverage@10%": 0.06184527374738721,
"calibration/coverage@15%": 0.11876790247201814,
"calibration/coverage@20%": 0.22484751066508574,
"calibration/coverage@25%": 0.31518733588180836,
"calibration/coverage@30%": 0.4264904932361337,
"calibration/coverage@5%": 0.020896437637260772,
"calibration/ece": 0.16186389460902642,
"calibration/mean_confidence": 0.47679469896631954,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02578125,
"completions/max_length": 3835.6,
"completions/max_terminated_length": 3835.6,
"completions/mean_length": 876.8658081054688,
"completions/mean_terminated_length": 900.225390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 299.4,
"epoch": 0.08399895001312484,
"grad_norm": 0.0005008935695514083,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0211,
"num_tokens": 97646057.0,
"reward": 0.9478654384613037,
"reward_std": 0.19103043675422668,
"rewards/accuracy_reward": 0.5026041567325592,
"rewards/brier_reward": 0.7327090382575989,
"rewards/confidence_uniqueness_reward": 0.8808719396591187,
"rewards/format_reward": 0.9735243082046509,
"rewards/frontier_aurc_reward": -0.003296623891219497,
"rewards/frontier_coverage_1": 0.06155742183327675,
"rewards/frontier_coverage_10": 0.06155742183327675,
"rewards/frontier_coverage_15": 0.06155742183327675,
"rewards/frontier_coverage_20": 0.06155742183327675,
"rewards/frontier_coverage_25": 0.06155742183327675,
"rewards/frontier_coverage_5": 0.06155742183327675,
"rewards/frontier_ece_reward": 0.012410728633403778,
"signal/accuracy_reward/centered_abs_mean": 0.2734483480453491,
"signal/accuracy_reward/group_std_mean": 0.3328505277633667,
"signal/accuracy_reward/group_zero_std_frac": 0.1583333358168602,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13672417402267456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13672417402267456,
"signal/advantage_abs_mean": 0.14573031067848205,
"signal/advantage_pre_scale_abs_mean": 0.14573031067848205,
"signal/advantage_pre_scale_std": 0.20383856296539307,
"signal/advantage_std": 0.20383856296539307,
"signal/brier_reward/centered_abs_mean": 0.2111766368150711,
"signal/brier_reward/group_std_mean": 0.2590035915374756,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026397079601883887,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.026397079601883887,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08130226284265518,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11520479023456573,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010162782855331898,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010162782855331898,
"signal/format_reward/centered_abs_mean": 0.04118381068110466,
"signal/format_reward/group_std_mean": 0.07573343813419342,
"signal/format_reward/group_zero_std_frac": 0.6944444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02059190534055233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02059190534055233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002494776528328657,
"signal/frontier_aurc_reward/group_std_mean": 0.0035075797699391843,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.465649835765362e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.465649835765362e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.28815833330154417,
"signal/frontier_coverage_1/group_std_mean": 0.3588971734046936,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_10/centered_abs_mean": 0.28815833330154417,
"signal/frontier_coverage_10/group_std_mean": 0.3588971734046936,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_15/centered_abs_mean": 0.28815833330154417,
"signal/frontier_coverage_15/group_std_mean": 0.3588971734046936,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_20/centered_abs_mean": 0.28815833330154417,
"signal/frontier_coverage_20/group_std_mean": 0.3588971734046936,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_25/centered_abs_mean": 0.28815833330154417,
"signal/frontier_coverage_25/group_std_mean": 0.3588971734046936,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_5/centered_abs_mean": 0.28815833330154417,
"signal/frontier_coverage_5/group_std_mean": 0.3588971734046936,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005158033780753612,
"signal/frontier_ece_reward/centered_abs_mean": 0.032915469631552695,
"signal/frontier_ece_reward/group_std_mean": 0.04370521605014801,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004114433703944087,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004114433703944087,
"step": 35
},
{
"calibration/aurc": 0.27142265360692236,
"calibration/batch_distribution_entropy": 0.9276274667151082,
"calibration/buffer_distribution_entropy": 0.9453424455999473,
"calibration/confidence_entropy": 0.4904303911764786,
"calibration/coverage@0%": 0.013755724139428155,
"calibration/coverage@1%": 0.013755724139428155,
"calibration/coverage@10%": 0.11803744967342229,
"calibration/coverage@15%": 0.16277429177868546,
"calibration/coverage@20%": 0.331587336757707,
"calibration/coverage@25%": 0.44951244122495027,
"calibration/coverage@30%": 0.5766922779499473,
"calibration/coverage@5%": 0.04112414519205974,
"calibration/ece": 0.10869480101542654,
"calibration/mean_confidence": 0.6108967077387699,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021961805555555557,
"completions/max_length": 3859.8,
"completions/max_terminated_length": 3859.8,
"completions/mean_length": 878.8442749023437,
"completions/mean_terminated_length": 898.7201538085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 276.4,
"epoch": 0.09599880001499982,
"grad_norm": 0.0006976813892833889,
"learning_rate": 4.761904761904762e-06,
"loss": -0.0213,
"num_tokens": 110889863.0,
"reward": 0.9803658723831177,
"reward_std": 0.19682038128376006,
"rewards/accuracy_reward": 0.5561631917953491,
"rewards/brier_reward": 0.749631917476654,
"rewards/confidence_uniqueness_reward": 0.9038300752639771,
"rewards/format_reward": 0.9776909708976745,
"rewards/frontier_aurc_reward": -0.003466126276180148,
"rewards/frontier_coverage_1": 0.04215884767472744,
"rewards/frontier_coverage_10": 0.04215884767472744,
"rewards/frontier_coverage_15": 0.04215884767472744,
"rewards/frontier_coverage_20": 0.04215884767472744,
"rewards/frontier_coverage_25": 0.04165246896445751,
"rewards/frontier_coverage_5": 0.04215884767472744,
"rewards/frontier_ece_reward": 0.018394294753670694,
"signal/accuracy_reward/centered_abs_mean": 0.2570475250482559,
"signal/accuracy_reward/group_std_mean": 0.3235936462879181,
"signal/accuracy_reward/group_zero_std_frac": 0.1472222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12852376252412795,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12852376252412795,
"signal/advantage_abs_mean": 0.14671072959899903,
"signal/advantage_pre_scale_abs_mean": 0.14671072959899903,
"signal/advantage_pre_scale_std": 0.21450220942497253,
"signal/advantage_std": 0.21450220942497253,
"signal/brier_reward/centered_abs_mean": 0.19289257526397705,
"signal/brier_reward/group_std_mean": 0.24276741445064545,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02411157190799713,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02411157190799713,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07457488030195236,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11184274405241013,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009321860037744045,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009321860037744045,
"signal/format_reward/centered_abs_mean": 0.03830837681889534,
"signal/format_reward/group_std_mean": 0.07467902153730392,
"signal/format_reward/group_zero_std_frac": 0.6833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01915418840944767,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01915418840944767,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003243405232205987,
"signal/frontier_aurc_reward/group_std_mean": 0.004562646104022861,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.805695327580907e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.805695327580907e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1917881727218628,
"signal/frontier_coverage_1/group_std_mean": 0.2579316467046738,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_10/centered_abs_mean": 0.1917881727218628,
"signal/frontier_coverage_10/group_std_mean": 0.2579316467046738,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_15/centered_abs_mean": 0.1917881727218628,
"signal/frontier_coverage_15/group_std_mean": 0.2579316467046738,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_20/centered_abs_mean": 0.1917881727218628,
"signal/frontier_coverage_20/group_std_mean": 0.2579316467046738,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_25/centered_abs_mean": 0.19005673825740815,
"signal/frontier_coverage_25/group_std_mean": 0.25573796927928927,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003402015473693609,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003402015473693609,
"signal/frontier_coverage_5/centered_abs_mean": 0.1917881727218628,
"signal/frontier_coverage_5/group_std_mean": 0.2579316467046738,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003433008212596178,
"signal/frontier_ece_reward/centered_abs_mean": 0.03396204262971878,
"signal/frontier_ece_reward/group_std_mean": 0.043666718155145647,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004245255328714848,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004245255328714848,
"step": 40
},
{
"calibration/aurc": 0.21111682886197997,
"calibration/batch_distribution_entropy": 0.7720451181894042,
"calibration/buffer_distribution_entropy": 0.9473069889769613,
"calibration/confidence_entropy": 0.3829034585720266,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.14865921706595187,
"calibration/coverage@15%": 0.20355953537722676,
"calibration/coverage@20%": 0.4912034258569726,
"calibration/coverage@25%": 0.7245557363623337,
"calibration/coverage@30%": 0.8875162025482677,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1270810302231555,
"calibration/mean_confidence": 0.7599788657483719,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02786458333333335,
"completions/max_length": 3859.8,
"completions/max_terminated_length": 3859.8,
"completions/mean_length": 915.6712768554687,
"completions/mean_terminated_length": 942.2672607421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 314.0,
"epoch": 0.1079986500168748,
"grad_norm": 0.000620553910266608,
"learning_rate": 4.909638554216868e-06,
"loss": -0.0259,
"num_tokens": 124573660.0,
"reward": 0.9803022265434265,
"reward_std": 0.20692598223686218,
"rewards/accuracy_reward": 0.5699652791023254,
"rewards/brier_reward": 0.7361868262290955,
"rewards/confidence_uniqueness_reward": 0.8916867733001709,
"rewards/format_reward": 0.9720486164093017,
"rewards/frontier_aurc_reward": -0.003825964545831084,
"rewards/frontier_coverage_1": 0.03256035540252924,
"rewards/frontier_coverage_10": 0.03256035540252924,
"rewards/frontier_coverage_15": 0.03256035540252924,
"rewards/frontier_coverage_20": 0.03256035540252924,
"rewards/frontier_coverage_25": 0.03246962446719408,
"rewards/frontier_coverage_5": 0.03256035540252924,
"rewards/frontier_ece_reward": 0.01907350979745388,
"signal/accuracy_reward/centered_abs_mean": 0.24231770634651184,
"signal/accuracy_reward/group_std_mean": 0.31229459047317504,
"signal/accuracy_reward/group_zero_std_frac": 0.14166666716337203,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12115885317325592,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12115885317325592,
"signal/advantage_abs_mean": 0.15380933582782746,
"signal/advantage_pre_scale_abs_mean": 0.15380933582782746,
"signal/advantage_pre_scale_std": 0.2281140685081482,
"signal/advantage_std": 0.2281140685081482,
"signal/brier_reward/centered_abs_mean": 0.1986512392759323,
"signal/brier_reward/group_std_mean": 0.2531787097454071,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02483140490949154,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02483140490949154,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08986316919326783,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1289219468832016,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011232896149158478,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011232896149158478,
"signal/format_reward/centered_abs_mean": 0.04635416753590107,
"signal/format_reward/group_std_mean": 0.08056993260979653,
"signal/format_reward/group_zero_std_frac": 0.694444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.023177083767950534,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.023177083767950534,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003908289410173893,
"signal/frontier_aurc_reward/group_std_mean": 0.005522150546312332,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.995837611611933e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.995837611611933e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1319861814379692,
"signal/frontier_coverage_1/group_std_mean": 0.18985169529914855,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_10/centered_abs_mean": 0.1319861814379692,
"signal/frontier_coverage_10/group_std_mean": 0.18985169529914855,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_15/centered_abs_mean": 0.1319861814379692,
"signal/frontier_coverage_15/group_std_mean": 0.18985169529914855,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_20/centered_abs_mean": 0.1319861814379692,
"signal/frontier_coverage_20/group_std_mean": 0.18985169529914855,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_25/centered_abs_mean": 0.12659270018339158,
"signal/frontier_coverage_25/group_std_mean": 0.1825567066669464,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022660091053694487,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022660091053694487,
"signal/frontier_coverage_5/centered_abs_mean": 0.1319861814379692,
"signal/frontier_coverage_5/group_std_mean": 0.18985169529914855,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023625525878742336,
"signal/frontier_ece_reward/centered_abs_mean": 0.033340536430478095,
"signal/frontier_ece_reward/group_std_mean": 0.04229341298341751,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004167567053809762,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004167567053809762,
"step": 45
},
{
"calibration/aurc": 0.3430718095214229,
"calibration/batch_distribution_entropy": 0.8041025516564181,
"calibration/buffer_distribution_entropy": 0.9401868214478146,
"calibration/confidence_entropy": 0.38931542096917787,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.014323607427055704,
"calibration/coverage@15%": 0.040318302387267906,
"calibration/coverage@20%": 0.15724626742288644,
"calibration/coverage@25%": 0.25119180206947106,
"calibration/coverage@30%": 0.386912761150543,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.22271492216092695,
"calibration/mean_confidence": 0.729308284858438,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.024739583333333325,
"completions/max_length": 3747.8,
"completions/max_terminated_length": 3747.8,
"completions/mean_length": 950.247998046875,
"completions/mean_terminated_length": 974.4024658203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 289.4,
"epoch": 0.11999850001874976,
"grad_norm": 0.0004917439073324203,
"learning_rate": 4.759036144578314e-06,
"loss": -0.0236,
"num_tokens": 138618117.0,
"reward": 0.9939500212669372,
"reward_std": 0.1924179255962372,
"rewards/accuracy_reward": 0.5958333373069763,
"rewards/brier_reward": 0.7404291272163391,
"rewards/confidence_uniqueness_reward": 0.8836002588272095,
"rewards/format_reward": 0.9748263835906983,
"rewards/frontier_aurc_reward": -0.0037740686908364295,
"rewards/frontier_coverage_1": 0.0278642563149333,
"rewards/frontier_coverage_10": 0.0278642563149333,
"rewards/frontier_coverage_15": 0.0278642563149333,
"rewards/frontier_coverage_20": 0.0278642563149333,
"rewards/frontier_coverage_25": 0.025865022838115693,
"rewards/frontier_coverage_5": 0.0278642563149333,
"rewards/frontier_ece_reward": 0.02181735634803772,
"signal/accuracy_reward/centered_abs_mean": 0.22273220419883727,
"signal/accuracy_reward/group_std_mean": 0.28220229744911196,
"signal/accuracy_reward/group_zero_std_frac": 0.2388888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11136610209941863,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11136610209941863,
"signal/advantage_abs_mean": 0.14409471452236175,
"signal/advantage_pre_scale_abs_mean": 0.14409471452236175,
"signal/advantage_pre_scale_std": 0.21969010531902314,
"signal/advantage_std": 0.21969010531902314,
"signal/brier_reward/centered_abs_mean": 0.19170068800449372,
"signal/brier_reward/group_std_mean": 0.24460790455341339,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023962586000561715,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.023962586000561715,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09260407537221908,
"signal/confidence_uniqueness_reward/group_std_mean": 0.12991546094417572,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011575509421527385,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011575509421527385,
"signal/format_reward/centered_abs_mean": 0.04220920167863369,
"signal/format_reward/group_std_mean": 0.07533831149339676,
"signal/format_reward/group_zero_std_frac": 0.7055555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.021104600839316846,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.021104600839316846,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003983176313340664,
"signal/frontier_aurc_reward/group_std_mean": 0.00584728941321373,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.129885198082775e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.129885198082775e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12392588853836059,
"signal/frontier_coverage_1/group_std_mean": 0.18006423413753508,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_10/centered_abs_mean": 0.12392588853836059,
"signal/frontier_coverage_10/group_std_mean": 0.18006423413753508,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_15/centered_abs_mean": 0.12392588853836059,
"signal/frontier_coverage_15/group_std_mean": 0.18006423413753508,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_20/centered_abs_mean": 0.12392588853836059,
"signal/frontier_coverage_20/group_std_mean": 0.18006423413753508,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_25/centered_abs_mean": 0.11814317256212234,
"signal/frontier_coverage_25/group_std_mean": 0.17232573330402373,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002114762645214796,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002114762645214796,
"signal/frontier_coverage_5/centered_abs_mean": 0.12392588853836059,
"signal/frontier_coverage_5/group_std_mean": 0.18006423413753508,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022182733286172152,
"signal/frontier_ece_reward/centered_abs_mean": 0.029776628687977792,
"signal/frontier_ece_reward/group_std_mean": 0.037955837696790694,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003722078585997224,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003722078585997224,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_calibration/aurc": 0.1801128277259354,
"eval_calibration/batch_distribution_entropy": 0.7401758100779751,
"eval_calibration/buffer_distribution_entropy": 0.9360676626033105,
"eval_calibration/confidence_entropy": 0.3804461416023805,
"eval_calibration/coverage@0%": 0.1562021072796935,
"eval_calibration/coverage@1%": 0.1562021072796935,
"eval_calibration/coverage@10%": 0.4049449233716475,
"eval_calibration/coverage@15%": 0.5112667624521073,
"eval_calibration/coverage@20%": 0.6722222222222222,
"eval_calibration/coverage@25%": 0.7611111111111111,
"eval_calibration/coverage@30%": 0.8875000000000001,
"eval_calibration/coverage@5%": 0.1562021072796935,
"eval_calibration/ece": 0.190010410979762,
"eval_calibration/mean_confidence": 0.738599688144678,
"eval_completions/clipped_ratio": 0.020833333333333332,
"eval_completions/max_length": 2587.3333333333335,
"eval_completions/max_terminated_length": 2587.3333333333335,
"eval_completions/mean_length": 941.2585144042969,
"eval_completions/mean_terminated_length": 961.3792317708334,
"eval_completions/min_length": 73.33333333333333,
"eval_completions/min_terminated_length": 390.1666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 138618117.0,
"eval_reward": 1.0041676660378773,
"eval_reward_std": 0.29189151525497437,
"eval_rewards/accuracy_reward": 0.621527781089147,
"eval_rewards/brier_reward": 0.7572712401549021,
"eval_rewards/confidence_uniqueness_reward": 0.8511187533537546,
"eval_rewards/format_reward": 0.9748263955116272,
"eval_rewards/frontier_aurc_reward": -0.0029798958372945585,
"eval_rewards/frontier_coverage_1": 0.021538497608465452,
"eval_rewards/frontier_coverage_10": 0.021538497608465452,
"eval_rewards/frontier_coverage_15": 0.021538497608465452,
"eval_rewards/frontier_coverage_20": 0.021538497608465452,
"eval_rewards/frontier_coverage_25": 0.021917358913924545,
"eval_rewards/frontier_coverage_5": 0.021538497608465452,
"eval_rewards/frontier_ece_reward": 0.021401030011475086,
"eval_runtime": 215.5104,
"eval_samples_per_second": 4.64,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4531249950329463,
"eval_signal/accuracy_reward/group_std_mean": 0.48286275565624237,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22656249751647314,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22656249751647314,
"eval_signal/advantage_abs_mean": 0.25273098051548004,
"eval_signal/advantage_pre_scale_abs_mean": 0.25273098051548004,
"eval_signal/advantage_pre_scale_std": 0.2910451292991638,
"eval_signal/advantage_std": 0.2910451292991638,
"eval_signal/brier_reward/centered_abs_mean": 0.24945268283287683,
"eval_signal/brier_reward/group_std_mean": 0.3116542746623357,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031181585354109604,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.031181585354109604,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.09014085307717323,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13949279735485712,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011267606634646654,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011267606634646654,
"eval_signal/format_reward/centered_abs_mean": 0.04736328109477957,
"eval_signal/format_reward/group_std_mean": 0.11093035619705915,
"eval_signal/format_reward/group_zero_std_frac": 0.4722222362955411,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.023681640547389787,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.023681640547389787,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004799036852394541,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007366780269270142,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.590275562407139e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.590275562407139e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.161320169766744,
"eval_signal/frontier_coverage_1/group_std_mean": 0.25421082725127536,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.161320169766744,
"eval_signal/frontier_coverage_10/group_std_mean": 0.25421082725127536,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.161320169766744,
"eval_signal/frontier_coverage_15/group_std_mean": 0.25421082725127536,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.161320169766744,
"eval_signal/frontier_coverage_20/group_std_mean": 0.25421082725127536,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.15456343442201614,
"eval_signal/frontier_coverage_25/group_std_mean": 0.24467646330595016,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027666852499047914,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027666852499047914,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.161320169766744,
"eval_signal/frontier_coverage_5/group_std_mean": 0.25421082725127536,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028876310292010507,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.04011191427707672,
"eval_signal/frontier_ece_reward/group_std_mean": 0.047835673515995346,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00501398928463459,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00501398928463459,
"eval_steps_per_second": 0.028,
"step": 50
},
{
"calibration/aurc": 0.23110574984938426,
"calibration/batch_distribution_entropy": 0.8089199000138481,
"calibration/buffer_distribution_entropy": 0.933102581412159,
"calibration/confidence_entropy": 0.38733970684542757,
"calibration/coverage@0%": 0.005305039787798409,
"calibration/coverage@1%": 0.005305039787798409,
"calibration/coverage@10%": 0.138139306638724,
"calibration/coverage@15%": 0.298316360622093,
"calibration/coverage@20%": 0.45166650464538166,
"calibration/coverage@25%": 0.5443626720704854,
"calibration/coverage@30%": 0.7179725549565843,
"calibration/coverage@5%": 0.019666741915457985,
"calibration/ece": 0.1603883890114183,
"calibration/mean_confidence": 0.7279595248111649,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02378472222222221,
"completions/max_length": 3765.0,
"completions/max_terminated_length": 3765.0,
"completions/mean_length": 989.8234497070313,
"completions/mean_terminated_length": 1014.1861572265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 282.0,
"epoch": 0.13199835002062474,
"grad_norm": 0.0004283857124391943,
"learning_rate": 4.60843373493976e-06,
"loss": -0.0216,
"num_tokens": 153101459.0,
"reward": 1.0128529906272887,
"reward_std": 0.17540223300457,
"rewards/accuracy_reward": 0.6284722328186035,
"rewards/brier_reward": 0.7503431439399719,
"rewards/confidence_uniqueness_reward": 0.9019472122192382,
"rewards/format_reward": 0.9759548664093017,
"rewards/frontier_aurc_reward": -0.0030153077095746994,
"rewards/frontier_coverage_1": 0.015781766315922142,
"rewards/frontier_coverage_10": 0.015781766315922142,
"rewards/frontier_coverage_15": 0.015781766315922142,
"rewards/frontier_coverage_20": 0.015781766315922142,
"rewards/frontier_coverage_25": 0.015925674338359386,
"rewards/frontier_coverage_5": 0.015781766315922142,
"rewards/frontier_ece_reward": 0.01967682149261236,
"signal/accuracy_reward/centered_abs_mean": 0.1988498240709305,
"signal/accuracy_reward/group_std_mean": 0.2617811858654022,
"signal/accuracy_reward/group_zero_std_frac": 0.25833333730697633,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09942491203546525,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09942491203546525,
"signal/advantage_abs_mean": 0.1299208104610443,
"signal/advantage_pre_scale_abs_mean": 0.1299208104610443,
"signal/advantage_pre_scale_std": 0.20433064699172973,
"signal/advantage_std": 0.20433064699172973,
"signal/brier_reward/centered_abs_mean": 0.18966372907161713,
"signal/brier_reward/group_std_mean": 0.23997350335121154,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02370796613395214,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02370796613395214,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07613965570926666,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1076380655169487,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009517456963658332,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009517456963658332,
"signal/format_reward/centered_abs_mean": 0.0372667096555233,
"signal/format_reward/group_std_mean": 0.0631372444331646,
"signal/format_reward/group_zero_std_frac": 0.7583333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01863335482776165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01863335482776165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030304885003715754,
"signal/frontier_aurc_reward/group_std_mean": 0.004345366265624762,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.424574264907278e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.424574264907278e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14639625549316407,
"signal/frontier_coverage_1/group_std_mean": 0.2078000247478485,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_10/centered_abs_mean": 0.14639625549316407,
"signal/frontier_coverage_10/group_std_mean": 0.2078000247478485,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_15/centered_abs_mean": 0.14639625549316407,
"signal/frontier_coverage_15/group_std_mean": 0.2078000247478485,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_20/centered_abs_mean": 0.14639625549316407,
"signal/frontier_coverage_20/group_std_mean": 0.2078000247478485,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_25/centered_abs_mean": 0.13936484456062317,
"signal/frontier_coverage_25/group_std_mean": 0.19845299422740936,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002494630683213472,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002494630683213472,
"signal/frontier_coverage_5/centered_abs_mean": 0.14639625549316407,
"signal/frontier_coverage_5/group_std_mean": 0.2078000247478485,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002620492875576019,
"signal/frontier_ece_reward/centered_abs_mean": 0.028044429421424866,
"signal/frontier_ece_reward/group_std_mean": 0.03536311313509941,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035055536776781083,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035055536776781083,
"step": 55
},
{
"calibration/aurc": 0.27031141258981295,
"calibration/batch_distribution_entropy": 0.925694518685917,
"calibration/buffer_distribution_entropy": 0.930078633270959,
"calibration/confidence_entropy": 0.4497176248945136,
"calibration/coverage@0%": 0.021467342760165452,
"calibration/coverage@1%": 0.021467342760165452,
"calibration/coverage@10%": 0.2214269049112599,
"calibration/coverage@15%": 0.3252333291365323,
"calibration/coverage@20%": 0.4505228062708378,
"calibration/coverage@25%": 0.5076995105341562,
"calibration/coverage@30%": 0.6556430446194226,
"calibration/coverage@5%": 0.07958252600623875,
"calibration/ece": 0.16927449984691845,
"calibration/mean_confidence": 0.6089067489743154,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.023697916666666673,
"completions/max_length": 3999.4,
"completions/max_terminated_length": 3999.4,
"completions/mean_length": 1033.7720581054687,
"completions/mean_terminated_length": 1058.8163940429688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 306.0,
"epoch": 0.14399820002249972,
"grad_norm": 0.00038596263038925827,
"learning_rate": 4.457831325301205e-06,
"loss": -0.0209,
"num_tokens": 168107089.0,
"reward": 1.0082432866096496,
"reward_std": 0.17611123621463776,
"rewards/accuracy_reward": 0.6102430701255799,
"rewards/brier_reward": 0.7585122108459472,
"rewards/confidence_uniqueness_reward": 0.921110475063324,
"rewards/format_reward": 0.9755208373069764,
"rewards/frontier_aurc_reward": -0.00250103990547359,
"rewards/frontier_coverage_1": 0.030346688139252363,
"rewards/frontier_coverage_10": 0.030346688139252363,
"rewards/frontier_coverage_15": 0.030346688139252363,
"rewards/frontier_coverage_20": 0.030346688139252363,
"rewards/frontier_coverage_25": 0.03001237902790308,
"rewards/frontier_coverage_5": 0.030346688139252363,
"rewards/frontier_ece_reward": 0.017600218765437603,
"signal/accuracy_reward/centered_abs_mean": 0.2137261301279068,
"signal/accuracy_reward/group_std_mean": 0.26997236013412473,
"signal/accuracy_reward/group_zero_std_frac": 0.28611111342906953,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1068630650639534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1068630650639534,
"signal/advantage_abs_mean": 0.13051027357578276,
"signal/advantage_pre_scale_abs_mean": 0.13051027357578276,
"signal/advantage_pre_scale_std": 0.20581234395503997,
"signal/advantage_std": 0.20581234395503997,
"signal/brier_reward/centered_abs_mean": 0.1883644551038742,
"signal/brier_reward/group_std_mean": 0.23838137984275817,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023545556887984275,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.023545556887984275,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06121076717972755,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09759739488363266,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007651345897465944,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007651345897465944,
"signal/format_reward/centered_abs_mean": 0.03964843787252903,
"signal/format_reward/group_std_mean": 0.07346592992544174,
"signal/format_reward/group_zero_std_frac": 0.7027777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.019824218936264515,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.019824218936264515,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002220911718904972,
"signal/frontier_aurc_reward/group_std_mean": 0.003224900644272566,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9754316821927206e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9754316821927206e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18549104630947114,
"signal/frontier_coverage_1/group_std_mean": 0.25374809205532073,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_10/centered_abs_mean": 0.18549104630947114,
"signal/frontier_coverage_10/group_std_mean": 0.25374809205532073,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_15/centered_abs_mean": 0.18549104630947114,
"signal/frontier_coverage_15/group_std_mean": 0.25374809205532073,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_20/centered_abs_mean": 0.18549104630947114,
"signal/frontier_coverage_20/group_std_mean": 0.25374809205532073,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_25/centered_abs_mean": 0.17355382144451142,
"signal/frontier_coverage_25/group_std_mean": 0.2386282503604889,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003106613457202911,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003106613457202911,
"signal/frontier_coverage_5/centered_abs_mean": 0.18549104630947114,
"signal/frontier_coverage_5/group_std_mean": 0.25374809205532073,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003320289496332407,
"signal/frontier_ece_reward/centered_abs_mean": 0.02598983086645603,
"signal/frontier_ece_reward/group_std_mean": 0.03347852304577827,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003248728858307004,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003248728858307004,
"step": 60
},
{
"calibration/aurc": 0.24556579726074385,
"calibration/batch_distribution_entropy": 0.9026302785834652,
"calibration/buffer_distribution_entropy": 0.9318042377016408,
"calibration/confidence_entropy": 0.4486623650823532,
"calibration/coverage@0%": 0.00793576986065844,
"calibration/coverage@1%": 0.00793576986065844,
"calibration/coverage@10%": 0.1194504595567045,
"calibration/coverage@15%": 0.3187219480359977,
"calibration/coverage@20%": 0.5244310933832791,
"calibration/coverage@25%": 0.6673086716262446,
"calibration/coverage@30%": 0.736127819466827,
"calibration/coverage@5%": 0.018434457524700434,
"calibration/ece": 0.1280835390664104,
"calibration/mean_confidence": 0.6550812971775258,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021006944444444443,
"completions/max_length": 3758.4,
"completions/max_terminated_length": 3758.4,
"completions/mean_length": 1050.6894287109376,
"completions/mean_terminated_length": 1073.2746337890626,
"completions/min_length": 0.0,
"completions/min_terminated_length": 247.6,
"epoch": 0.1559980500243747,
"grad_norm": 0.0004071203584317118,
"learning_rate": 4.307228915662651e-06,
"loss": -0.0193,
"num_tokens": 183305079.0,
"reward": 1.0295760035514832,
"reward_std": 0.1585527241230011,
"rewards/accuracy_reward": 0.6458333253860473,
"rewards/brier_reward": 0.7787026405334473,
"rewards/confidence_uniqueness_reward": 0.922456705570221,
"rewards/format_reward": 0.9786458373069763,
"rewards/frontier_aurc_reward": -0.0021291735116392373,
"rewards/frontier_coverage_1": 0.022982970625162125,
"rewards/frontier_coverage_10": 0.022982970625162125,
"rewards/frontier_coverage_15": 0.022982970625162125,
"rewards/frontier_coverage_20": 0.022982970625162125,
"rewards/frontier_coverage_25": 0.022807615250349043,
"rewards/frontier_coverage_5": 0.022982970625162125,
"rewards/frontier_ece_reward": 0.018114662915468215,
"signal/accuracy_reward/centered_abs_mean": 0.1768988698720932,
"signal/accuracy_reward/group_std_mean": 0.2400292694568634,
"signal/accuracy_reward/group_zero_std_frac": 0.30277777910232545,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0884494349360466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0884494349360466,
"signal/advantage_abs_mean": 0.1123097226023674,
"signal/advantage_pre_scale_abs_mean": 0.1123097226023674,
"signal/advantage_pre_scale_std": 0.18798567056655885,
"signal/advantage_std": 0.18798567056655885,
"signal/brier_reward/centered_abs_mean": 0.1735439658164978,
"signal/brier_reward/group_std_mean": 0.22210538983345032,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021692995727062226,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021692995727062226,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.058646786212921145,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09276971966028214,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007330848276615143,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007330848276615143,
"signal/format_reward/centered_abs_mean": 0.03575303815305233,
"signal/format_reward/group_std_mean": 0.06657437458634377,
"signal/format_reward/group_zero_std_frac": 0.7277777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017876519076526164,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.017876519076526164,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018694063648581505,
"signal/frontier_aurc_reward/group_std_mean": 0.0027225222904235123,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.346237317600753e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.346237317600753e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16750572323799134,
"signal/frontier_coverage_1/group_std_mean": 0.232903328537941,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_10/centered_abs_mean": 0.16750572323799134,
"signal/frontier_coverage_10/group_std_mean": 0.232903328537941,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_15/centered_abs_mean": 0.16750572323799134,
"signal/frontier_coverage_15/group_std_mean": 0.232903328537941,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_20/centered_abs_mean": 0.16750572323799134,
"signal/frontier_coverage_20/group_std_mean": 0.232903328537941,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_25/centered_abs_mean": 0.1528725266456604,
"signal/frontier_coverage_25/group_std_mean": 0.21361254751682282,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00273641818203032,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00273641818203032,
"signal/frontier_coverage_5/centered_abs_mean": 0.16750572323799134,
"signal/frontier_coverage_5/group_std_mean": 0.232903328537941,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002998352330178022,
"signal/frontier_ece_reward/centered_abs_mean": 0.02373338267207146,
"signal/frontier_ece_reward/group_std_mean": 0.03065846674144268,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0029666728340089323,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0029666728340089323,
"step": 65
},
{
"calibration/aurc": 0.25933503364998944,
"calibration/batch_distribution_entropy": 0.8800218583456665,
"calibration/buffer_distribution_entropy": 0.9317119694851437,
"calibration/confidence_entropy": 0.4205811264235626,
"calibration/coverage@0%": 0.003237462060991473,
"calibration/coverage@1%": 0.003237462060991473,
"calibration/coverage@10%": 0.14655297008238183,
"calibration/coverage@15%": 0.216372308137014,
"calibration/coverage@20%": 0.38418761415631975,
"calibration/coverage@25%": 0.5244719724579049,
"calibration/coverage@30%": 0.6908412398030606,
"calibration/coverage@5%": 0.037462060991472756,
"calibration/ece": 0.1322729398015512,
"calibration/mean_confidence": 0.6758229255336713,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02743055555555556,
"completions/max_length": 3929.0,
"completions/max_terminated_length": 3929.0,
"completions/mean_length": 1083.6014892578125,
"completions/mean_terminated_length": 1114.269970703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 245.0,
"epoch": 0.16799790002624967,
"grad_norm": 0.0003614244342315942,
"learning_rate": 4.156626506024097e-06,
"loss": -0.0247,
"num_tokens": 198866312.0,
"reward": 1.0165080308914185,
"reward_std": 0.16381416022777556,
"rewards/accuracy_reward": 0.6317708253860473,
"rewards/brier_reward": 0.7659277558326721,
"rewards/confidence_uniqueness_reward": 0.9152140974998474,
"rewards/format_reward": 0.971875011920929,
"rewards/frontier_aurc_reward": -0.0022951006889343263,
"rewards/frontier_coverage_1": 0.023939225263893603,
"rewards/frontier_coverage_10": 0.023939225263893603,
"rewards/frontier_coverage_15": 0.023939225263893603,
"rewards/frontier_coverage_20": 0.023939225263893603,
"rewards/frontier_coverage_25": 0.023304045526310803,
"rewards/frontier_coverage_5": 0.023939225263893603,
"rewards/frontier_ece_reward": 0.016189970448613165,
"signal/accuracy_reward/centered_abs_mean": 0.17754991054534913,
"signal/accuracy_reward/group_std_mean": 0.23475689589977264,
"signal/accuracy_reward/group_zero_std_frac": 0.3277777820825577,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08877495527267457,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08877495527267457,
"signal/advantage_abs_mean": 0.11769283264875412,
"signal/advantage_pre_scale_abs_mean": 0.11769283264875412,
"signal/advantage_pre_scale_std": 0.1989002138376236,
"signal/advantage_std": 0.1989002138376236,
"signal/brier_reward/centered_abs_mean": 0.17325561940670015,
"signal/brier_reward/group_std_mean": 0.22210197150707245,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021656952425837518,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021656952425837518,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06669195368885994,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1016717791557312,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008336494211107492,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008336494211107492,
"signal/format_reward/centered_abs_mean": 0.04429253414273262,
"signal/format_reward/group_std_mean": 0.074928018450737,
"signal/format_reward/group_zero_std_frac": 0.7222222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02214626707136631,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.02214626707136631,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019255728693678975,
"signal/frontier_aurc_reward/group_std_mean": 0.0028066968079656363,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4467753357603216e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4467753357603216e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15990498065948486,
"signal/frontier_coverage_1/group_std_mean": 0.22118420898914337,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_10/centered_abs_mean": 0.15990498065948486,
"signal/frontier_coverage_10/group_std_mean": 0.22118420898914337,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_15/centered_abs_mean": 0.15990498065948486,
"signal/frontier_coverage_15/group_std_mean": 0.22118420898914337,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_20/centered_abs_mean": 0.15990498065948486,
"signal/frontier_coverage_20/group_std_mean": 0.22118420898914337,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_25/centered_abs_mean": 0.13494354784488677,
"signal/frontier_coverage_25/group_std_mean": 0.18864382803440094,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002415489498525858,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002415489498525858,
"signal/frontier_coverage_5/centered_abs_mean": 0.15990498065948486,
"signal/frontier_coverage_5/group_std_mean": 0.22118420898914337,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028622991405427454,
"signal/frontier_ece_reward/centered_abs_mean": 0.022081541270017623,
"signal/frontier_ece_reward/group_std_mean": 0.02841006629168987,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002760192658752203,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002760192658752203,
"step": 70
},
{
"calibration/aurc": 0.21584225769988402,
"calibration/batch_distribution_entropy": 0.8232698717769189,
"calibration/buffer_distribution_entropy": 0.9289513937019261,
"calibration/confidence_entropy": 0.40924202055118303,
"calibration/coverage@0%": 0.02377966824023079,
"calibration/coverage@1%": 0.02377966824023079,
"calibration/coverage@10%": 0.27830202596380804,
"calibration/coverage@15%": 0.3686221479150275,
"calibration/coverage@20%": 0.4240375579159018,
"calibration/coverage@25%": 0.5429435483870968,
"calibration/coverage@30%": 0.7251450198075834,
"calibration/coverage@5%": 0.146294065259201,
"calibration/ece": 0.133336138135841,
"calibration/mean_confidence": 0.7307139257891493,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01875,
"completions/max_length": 3962.8,
"completions/max_terminated_length": 3962.8,
"completions/mean_length": 1109.963720703125,
"completions/mean_terminated_length": 1131.3071044921876,
"completions/min_length": 0.0,
"completions/min_terminated_length": 300.6,
"epoch": 0.17999775002812465,
"grad_norm": 0.000340988248353824,
"learning_rate": 4.006024096385543e-06,
"loss": -0.0156,
"num_tokens": 214717990.0,
"reward": 1.048438024520874,
"reward_std": 0.1558634340763092,
"rewards/accuracy_reward": 0.6855034828186035,
"rewards/brier_reward": 0.784266984462738,
"rewards/confidence_uniqueness_reward": 0.9156635403633118,
"rewards/format_reward": 0.9809895753860474,
"rewards/frontier_aurc_reward": -0.002111028810031712,
"rewards/frontier_coverage_1": 0.005601268447935581,
"rewards/frontier_coverage_10": 0.005601268447935581,
"rewards/frontier_coverage_15": 0.005601268447935581,
"rewards/frontier_coverage_20": 0.005601268447935581,
"rewards/frontier_coverage_25": 0.010380196291953326,
"rewards/frontier_coverage_5": 0.005601268447935581,
"rewards/frontier_ece_reward": 0.01640697121620178,
"signal/accuracy_reward/centered_abs_mean": 0.18347981870174407,
"signal/accuracy_reward/group_std_mean": 0.2395369827747345,
"signal/accuracy_reward/group_zero_std_frac": 0.3277777791023254,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09173990935087203,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09173990935087203,
"signal/advantage_abs_mean": 0.11403761059045792,
"signal/advantage_pre_scale_abs_mean": 0.11403761059045792,
"signal/advantage_pre_scale_std": 0.19213829636573793,
"signal/advantage_std": 0.19213829636573793,
"signal/brier_reward/centered_abs_mean": 0.1615518569946289,
"signal/brier_reward/group_std_mean": 0.2098041832447052,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020193982124328613,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020193982124328613,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06113546639680863,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09073543101549149,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007641933299601078,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007641933299601078,
"signal/format_reward/centered_abs_mean": 0.03050672747194767,
"signal/format_reward/group_std_mean": 0.054883723706007005,
"signal/format_reward/group_zero_std_frac": 0.7805555582046508,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015253363735973835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.015253363735973835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018784363754093647,
"signal/frontier_aurc_reward/group_std_mean": 0.002798874117434025,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.362400966580026e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.362400966580026e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13647643923759462,
"signal/frontier_coverage_1/group_std_mean": 0.19730258584022523,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_10/centered_abs_mean": 0.13647643923759462,
"signal/frontier_coverage_10/group_std_mean": 0.19730258584022523,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_15/centered_abs_mean": 0.13647643923759462,
"signal/frontier_coverage_15/group_std_mean": 0.19730258584022523,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_20/centered_abs_mean": 0.13647643923759462,
"signal/frontier_coverage_20/group_std_mean": 0.19730258584022523,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_25/centered_abs_mean": 0.10959683507680892,
"signal/frontier_coverage_25/group_std_mean": 0.16029545962810515,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019617833429947497,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019617833429947497,
"signal/frontier_coverage_5/centered_abs_mean": 0.13647643923759462,
"signal/frontier_coverage_5/group_std_mean": 0.19730258584022523,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002442928357049823,
"signal/frontier_ece_reward/centered_abs_mean": 0.019810602813959122,
"signal/frontier_ece_reward/group_std_mean": 0.025616522505879404,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0024763253517448903,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0024763253517448903,
"step": 75
},
{
"calibration/aurc": 0.21687736600561439,
"calibration/batch_distribution_entropy": 0.7663935195687704,
"calibration/buffer_distribution_entropy": 0.9254838064808076,
"calibration/confidence_entropy": 0.4093730514531302,
"calibration/coverage@0%": 0.014151451943476879,
"calibration/coverage@1%": 0.014151451943476879,
"calibration/coverage@10%": 0.019910614247141797,
"calibration/coverage@15%": 0.35223903051059835,
"calibration/coverage@20%": 0.547384604330937,
"calibration/coverage@25%": 0.7177102314561801,
"calibration/coverage@30%": 0.8540723393182409,
"calibration/coverage@5%": 0.019910614247141797,
"calibration/ece": 0.154557627381037,
"calibration/mean_confidence": 0.7600029543594301,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018663194444444465,
"completions/max_length": 3866.6,
"completions/max_terminated_length": 3866.6,
"completions/mean_length": 1140.516748046875,
"completions/mean_terminated_length": 1162.3487548828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 255.4,
"epoch": 0.19199760002999963,
"grad_norm": 0.0004383666382636875,
"learning_rate": 3.855421686746989e-06,
"loss": -0.0172,
"num_tokens": 230910023.0,
"reward": 1.0291898369789123,
"reward_std": 0.15721507370471954,
"rewards/accuracy_reward": 0.6474826335906982,
"rewards/brier_reward": 0.7720065116882324,
"rewards/confidence_uniqueness_reward": 0.9144485116004943,
"rewards/format_reward": 0.9813368082046509,
"rewards/frontier_aurc_reward": -0.0023890127893537285,
"rewards/frontier_coverage_1": 0.021373348124325276,
"rewards/frontier_coverage_10": 0.021373348124325276,
"rewards/frontier_coverage_15": 0.021373348124325276,
"rewards/frontier_coverage_20": 0.021373348124325276,
"rewards/frontier_coverage_25": 0.02539810836315155,
"rewards/frontier_coverage_5": 0.021373348124325276,
"rewards/frontier_ece_reward": 0.013187539111822844,
"signal/accuracy_reward/centered_abs_mean": 0.18199326992034912,
"signal/accuracy_reward/group_std_mean": 0.24213020503520966,
"signal/accuracy_reward/group_zero_std_frac": 0.30833333134651186,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09099663496017456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09099663496017456,
"signal/advantage_abs_mean": 0.11401565670967102,
"signal/advantage_pre_scale_abs_mean": 0.11401565670967102,
"signal/advantage_pre_scale_std": 0.18852558135986328,
"signal/advantage_std": 0.18852558135986328,
"signal/brier_reward/centered_abs_mean": 0.16122573614120483,
"signal/brier_reward/group_std_mean": 0.20942769348621368,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020153217017650604,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020153217017650604,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.061580770462751386,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09112796634435653,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007697596307843923,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007697596307843923,
"signal/format_reward/centered_abs_mean": 0.02798936665058136,
"signal/format_reward/group_std_mean": 0.05254996344447136,
"signal/format_reward/group_zero_std_frac": 0.7805555820465088,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01399468332529068,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01399468332529068,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019946877844631674,
"signal/frontier_aurc_reward/group_std_mean": 0.0029552684631198646,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5704911351786e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5704911351786e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12816164940595626,
"signal/frontier_coverage_1/group_std_mean": 0.18385762274265288,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_10/centered_abs_mean": 0.12816164940595626,
"signal/frontier_coverage_10/group_std_mean": 0.18385762274265288,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_15/centered_abs_mean": 0.12816164940595626,
"signal/frontier_coverage_15/group_std_mean": 0.18385762274265288,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_20/centered_abs_mean": 0.12816164940595626,
"signal/frontier_coverage_20/group_std_mean": 0.18385762274265288,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_25/centered_abs_mean": 0.09580764919519424,
"signal/frontier_coverage_25/group_std_mean": 0.13949446082115174,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001714956876821816,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001714956876821816,
"signal/frontier_coverage_5/centered_abs_mean": 0.12816164940595626,
"signal/frontier_coverage_5/group_std_mean": 0.18385762274265288,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002294093510136008,
"signal/frontier_ece_reward/centered_abs_mean": 0.018544533848762514,
"signal/frontier_ece_reward/group_std_mean": 0.02403351552784443,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002318066731095314,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002318066731095314,
"step": 80
},
{
"calibration/aurc": 0.23449193306557897,
"calibration/batch_distribution_entropy": 0.8591898174593183,
"calibration/buffer_distribution_entropy": 0.9250169156228909,
"calibration/confidence_entropy": 0.43118254354529395,
"calibration/coverage@0%": 0.013292608746136697,
"calibration/coverage@1%": 0.013292608746136697,
"calibration/coverage@10%": 0.0825480865481328,
"calibration/coverage@15%": 0.18059930829873508,
"calibration/coverage@20%": 0.2875147713491862,
"calibration/coverage@25%": 0.6253215245244499,
"calibration/coverage@30%": 0.8177006976439463,
"calibration/coverage@5%": 0.02068047154297047,
"calibration/ece": 0.12127799731225304,
"calibration/mean_confidence": 0.6915215553744556,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01597222222222223,
"completions/max_length": 3916.4,
"completions/max_terminated_length": 3916.4,
"completions/mean_length": 1137.9025146484375,
"completions/mean_terminated_length": 1156.4914306640626,
"completions/min_length": 0.0,
"completions/min_terminated_length": 301.6,
"epoch": 0.2039974500318746,
"grad_norm": 0.00035721127642318606,
"learning_rate": 3.7048192771084342e-06,
"loss": -0.0138,
"num_tokens": 247105860.0,
"reward": 1.0467980623245239,
"reward_std": 0.15480698943138121,
"rewards/accuracy_reward": 0.675000011920929,
"rewards/brier_reward": 0.7885265827178956,
"rewards/confidence_uniqueness_reward": 0.9201976656913757,
"rewards/format_reward": 0.9839409708976745,
"rewards/frontier_aurc_reward": -0.0019012054428458214,
"rewards/frontier_coverage_1": 0.018737619929015636,
"rewards/frontier_coverage_10": 0.018737619929015636,
"rewards/frontier_coverage_15": 0.018737619929015636,
"rewards/frontier_coverage_20": 0.018737619929015636,
"rewards/frontier_coverage_25": 0.020995143987238406,
"rewards/frontier_coverage_5": 0.018737619929015636,
"rewards/frontier_ece_reward": 0.013746128231287003,
"signal/accuracy_reward/centered_abs_mean": 0.18850911557674407,
"signal/accuracy_reward/group_std_mean": 0.243252757191658,
"signal/accuracy_reward/group_zero_std_frac": 0.325,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09425455778837204,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09425455778837204,
"signal/advantage_abs_mean": 0.11363825798034669,
"signal/advantage_pre_scale_abs_mean": 0.11363825798034669,
"signal/advantage_pre_scale_std": 0.18625059127807617,
"signal/advantage_std": 0.18625059127807617,
"signal/brier_reward/centered_abs_mean": 0.17142007052898406,
"signal/brier_reward/group_std_mean": 0.21921891272068023,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021427508816123007,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021427508816123007,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05906034857034683,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08878287822008132,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007382543571293354,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007382543571293354,
"signal/format_reward/centered_abs_mean": 0.02738172747194767,
"signal/format_reward/group_std_mean": 0.052156589925289154,
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013690863735973835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013690863735973835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018494134768843652,
"signal/frontier_aurc_reward/group_std_mean": 0.0028435390442609785,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.310450192657299e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.310450192657299e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15455899834632875,
"signal/frontier_coverage_1/group_std_mean": 0.21880318522453307,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_10/centered_abs_mean": 0.15455899834632875,
"signal/frontier_coverage_10/group_std_mean": 0.21880318522453307,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_15/centered_abs_mean": 0.15455899834632875,
"signal/frontier_coverage_15/group_std_mean": 0.21880318522453307,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_20/centered_abs_mean": 0.15455899834632875,
"signal/frontier_coverage_20/group_std_mean": 0.21880318522453307,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_25/centered_abs_mean": 0.10806576907634735,
"signal/frontier_coverage_25/group_std_mean": 0.1546470195055008,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019343771506100892,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019343771506100892,
"signal/frontier_coverage_5/centered_abs_mean": 0.15455899834632875,
"signal/frontier_coverage_5/group_std_mean": 0.21880318522453307,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027666058391332625,
"signal/frontier_ece_reward/centered_abs_mean": 0.01972369700670242,
"signal/frontier_ece_reward/group_std_mean": 0.0256511677056551,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0024654621258378027,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0024654621258378027,
"step": 85
},
{
"calibration/aurc": 0.17547247118711584,
"calibration/batch_distribution_entropy": 0.8324117665264612,
"calibration/buffer_distribution_entropy": 0.9242962693143969,
"calibration/confidence_entropy": 0.41812509388979313,
"calibration/coverage@0%": 0.006274520525474006,
"calibration/coverage@1%": 0.006274520525474006,
"calibration/coverage@10%": 0.18623046036962448,
"calibration/coverage@15%": 0.5280255901645126,
"calibration/coverage@20%": 0.7003362423016254,
"calibration/coverage@25%": 0.8467375352104126,
"calibration/coverage@30%": 0.9238959714431413,
"calibration/coverage@5%": 0.006274520525474006,
"calibration/ece": 0.10362900900626756,
"calibration/mean_confidence": 0.6853092687555817,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017708333333333305,
"completions/max_length": 3909.4,
"completions/max_terminated_length": 3909.4,
"completions/mean_length": 1028.1794311523438,
"completions/mean_terminated_length": 1046.72275390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 257.8,
"epoch": 0.2159973000337496,
"grad_norm": 0.00035051541635766625,
"learning_rate": 3.5542168674698798e-06,
"loss": -0.0144,
"num_tokens": 262019159.0,
"reward": 1.044158434867859,
"reward_std": 0.15149846374988557,
"rewards/accuracy_reward": 0.6759548544883728,
"rewards/brier_reward": 0.7803074479103088,
"rewards/confidence_uniqueness_reward": 0.9109542846679688,
"rewards/format_reward": 0.9822916626930237,
"rewards/frontier_aurc_reward": -0.002121423464268446,
"rewards/frontier_coverage_1": 0.01945815598592162,
"rewards/frontier_coverage_10": 0.01945815598592162,
"rewards/frontier_coverage_15": 0.01945815598592162,
"rewards/frontier_coverage_20": 0.01945815598592162,
"rewards/frontier_coverage_25": 0.02471369504928589,
"rewards/frontier_coverage_5": 0.01945815598592162,
"rewards/frontier_ece_reward": 0.011852136347442865,
"signal/accuracy_reward/centered_abs_mean": 0.1751681834459305,
"signal/accuracy_reward/group_std_mean": 0.229813551902771,
"signal/accuracy_reward/group_zero_std_frac": 0.3500000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08758409172296525,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08758409172296525,
"signal/advantage_abs_mean": 0.11029749214649201,
"signal/advantage_pre_scale_abs_mean": 0.11029749214649201,
"signal/advantage_pre_scale_std": 0.1847362846136093,
"signal/advantage_std": 0.1847362846136093,
"signal/brier_reward/centered_abs_mean": 0.17710019648075104,
"signal/brier_reward/group_std_mean": 0.225379142165184,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02213752456009388,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02213752456009388,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06355848461389542,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09241807758808136,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007944810576736927,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007944810576736927,
"signal/format_reward/centered_abs_mean": 0.02744140662252903,
"signal/format_reward/group_std_mean": 0.05012650415301323,
"signal/format_reward/group_zero_std_frac": 0.800000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013720703311264515,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013720703311264515,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019779345020651817,
"signal/frontier_aurc_reward/group_std_mean": 0.0030898852739483116,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5405028756940735e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5405028756940735e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15366960763931276,
"signal/frontier_coverage_1/group_std_mean": 0.2199167400598526,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_10/centered_abs_mean": 0.15366960763931276,
"signal/frontier_coverage_10/group_std_mean": 0.2199167400598526,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_15/centered_abs_mean": 0.15366960763931276,
"signal/frontier_coverage_15/group_std_mean": 0.2199167400598526,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_20/centered_abs_mean": 0.15366960763931276,
"signal/frontier_coverage_20/group_std_mean": 0.2199167400598526,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_25/centered_abs_mean": 0.10579841881990433,
"signal/frontier_coverage_25/group_std_mean": 0.1524827867746353,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018937916029244661,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018937916029244661,
"signal/frontier_coverage_5/centered_abs_mean": 0.15366960763931276,
"signal/frontier_coverage_5/group_std_mean": 0.2199167400598526,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027506859973073007,
"signal/frontier_ece_reward/centered_abs_mean": 0.019161980226635934,
"signal/frontier_ece_reward/group_std_mean": 0.025356636941432954,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002395247528329492,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002395247528329492,
"step": 90
},
{
"calibration/aurc": 0.21844268779520587,
"calibration/batch_distribution_entropy": 0.8795974563426677,
"calibration/buffer_distribution_entropy": 0.9255697608305195,
"calibration/confidence_entropy": 0.43751562875025146,
"calibration/coverage@0%": 0.012130314557219657,
"calibration/coverage@1%": 0.012130314557219657,
"calibration/coverage@10%": 0.14825596900748145,
"calibration/coverage@15%": 0.3928766365487585,
"calibration/coverage@20%": 0.602737623456852,
"calibration/coverage@25%": 0.6771237675480899,
"calibration/coverage@30%": 0.7594675782497744,
"calibration/coverage@5%": 0.03516696377187935,
"calibration/ece": 0.13509608069340767,
"calibration/mean_confidence": 0.6490796523758365,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012586805555555558,
"completions/max_length": 3796.2,
"completions/max_terminated_length": 3796.2,
"completions/mean_length": 1077.1295166015625,
"completions/mean_terminated_length": 1090.8811279296874,
"completions/min_length": 0.0,
"completions/min_terminated_length": 270.6,
"epoch": 0.22799715003562457,
"grad_norm": 0.0004233669606037438,
"learning_rate": 3.4036144578313257e-06,
"loss": -0.012,
"num_tokens": 277519371.0,
"reward": 1.0404303312301635,
"reward_std": 0.14202898740768433,
"rewards/accuracy_reward": 0.6548611044883728,
"rewards/brier_reward": 0.7843343019485474,
"rewards/confidence_uniqueness_reward": 0.9325709342956543,
"rewards/format_reward": 0.9874131917953491,
"rewards/frontier_aurc_reward": -0.001874490245245397,
"rewards/frontier_coverage_1": 0.030402445048093796,
"rewards/frontier_coverage_10": 0.030402445048093796,
"rewards/frontier_coverage_15": 0.030402445048093796,
"rewards/frontier_coverage_20": 0.030402445048093796,
"rewards/frontier_coverage_25": 0.03526088930666447,
"rewards/frontier_coverage_5": 0.030402445048093796,
"rewards/frontier_ece_reward": 0.01089109033346176,
"signal/accuracy_reward/centered_abs_mean": 0.16573350727558137,
"signal/accuracy_reward/group_std_mean": 0.2209865093231201,
"signal/accuracy_reward/group_zero_std_frac": 0.37222222685813905,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08286675363779068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08286675363779068,
"signal/advantage_abs_mean": 0.10206114500761032,
"signal/advantage_pre_scale_abs_mean": 0.10206114500761032,
"signal/advantage_pre_scale_std": 0.17137506306171418,
"signal/advantage_std": 0.17137506306171418,
"signal/brier_reward/centered_abs_mean": 0.17366032898426056,
"signal/brier_reward/group_std_mean": 0.2203914701938629,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02170754112303257,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02170754112303257,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04881677031517029,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07638464868068695,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006102096289396286,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006102096289396286,
"signal/format_reward/centered_abs_mean": 0.022520615719258785,
"signal/format_reward/group_std_mean": 0.04575785622000694,
"signal/format_reward/group_zero_std_frac": 0.8027777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011260307859629393,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011260307859629393,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016592080472037196,
"signal/frontier_aurc_reward/group_std_mean": 0.002632213244214654,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9699822334805503e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9699822334805503e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1714262396097183,
"signal/frontier_coverage_1/group_std_mean": 0.23666555285453797,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_10/centered_abs_mean": 0.1714262396097183,
"signal/frontier_coverage_10/group_std_mean": 0.23666555285453797,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_15/centered_abs_mean": 0.1714262396097183,
"signal/frontier_coverage_15/group_std_mean": 0.23666555285453797,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_20/centered_abs_mean": 0.1714262396097183,
"signal/frontier_coverage_20/group_std_mean": 0.23666555285453797,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_25/centered_abs_mean": 0.11292467564344406,
"signal/frontier_coverage_25/group_std_mean": 0.15729531347751619,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020213516661897303,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020213516661897303,
"signal/frontier_coverage_5/centered_abs_mean": 0.1714262396097183,
"signal/frontier_coverage_5/group_std_mean": 0.23666555285453797,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030685296282172204,
"signal/frontier_ece_reward/centered_abs_mean": 0.018675522133708,
"signal/frontier_ece_reward/group_std_mean": 0.024996720254421234,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023344402667135,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023344402667135,
"step": 95
},
{
"calibration/aurc": 0.19269483529461973,
"calibration/batch_distribution_entropy": 0.9275256037664482,
"calibration/buffer_distribution_entropy": 0.926590680796312,
"calibration/confidence_entropy": 0.447337380697335,
"calibration/coverage@0%": 0.01488082526195049,
"calibration/coverage@1%": 0.01488082526195049,
"calibration/coverage@10%": 0.16575615682693723,
"calibration/coverage@15%": 0.3623856419311955,
"calibration/coverage@20%": 0.5321022617479968,
"calibration/coverage@25%": 0.8242483211397911,
"calibration/coverage@30%": 0.9162873399715504,
"calibration/coverage@5%": 0.062095679373356326,
"calibration/ece": 0.14211453397091672,
"calibration/mean_confidence": 0.6130730250809763,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01675347222222221,
"completions/max_length": 3890.2,
"completions/max_terminated_length": 3890.2,
"completions/mean_length": 1112.4980224609376,
"completions/mean_terminated_length": 1131.3287841796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 300.8,
"epoch": 0.23999700003749952,
"grad_norm": 0.0003026402264367789,
"learning_rate": 3.2530120481927713e-06,
"loss": -0.0142,
"num_tokens": 293434420.0,
"reward": 1.0494256734848022,
"reward_std": 0.14548470377922057,
"rewards/accuracy_reward": 0.6754340291023254,
"rewards/brier_reward": 0.7901706337928772,
"rewards/confidence_uniqueness_reward": 0.9355661392211914,
"rewards/format_reward": 0.9832465171813964,
"rewards/frontier_aurc_reward": -0.0016389565775170923,
"rewards/frontier_coverage_1": 0.026069404324516654,
"rewards/frontier_coverage_10": 0.026069404324516654,
"rewards/frontier_coverage_15": 0.026069404324516654,
"rewards/frontier_coverage_20": 0.026069404324516654,
"rewards/frontier_coverage_25": 0.03032403439283371,
"rewards/frontier_coverage_5": 0.026069404324516654,
"rewards/frontier_ece_reward": 0.012173208221793175,
"signal/accuracy_reward/centered_abs_mean": 0.17470160722732545,
"signal/accuracy_reward/group_std_mean": 0.229377481341362,
"signal/accuracy_reward/group_zero_std_frac": 0.3500000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08735080361366272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08735080361366272,
"signal/advantage_abs_mean": 0.10435573160648345,
"signal/advantage_pre_scale_abs_mean": 0.10435573160648345,
"signal/advantage_pre_scale_std": 0.17683197557926178,
"signal/advantage_std": 0.17683197557926178,
"signal/brier_reward/centered_abs_mean": 0.17027658522129058,
"signal/brier_reward/group_std_mean": 0.21771090626716613,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021284573152661322,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021284573152661322,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04709148705005646,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07536679804325104,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0058864358812570575,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0058864358812570575,
"signal/format_reward/centered_abs_mean": 0.02706705704331398,
"signal/format_reward/group_std_mean": 0.05143220648169518,
"signal/format_reward/group_zero_std_frac": 0.7888889074325561,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01353352852165699,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01353352852165699,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016033690189942718,
"signal/frontier_aurc_reward/group_std_mean": 0.002564445650205016,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8700304392259567e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8700304392259567e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18503127694129945,
"signal/frontier_coverage_1/group_std_mean": 0.2500757068395615,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_10/centered_abs_mean": 0.18503127694129945,
"signal/frontier_coverage_10/group_std_mean": 0.2500757068395615,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_15/centered_abs_mean": 0.18503127694129945,
"signal/frontier_coverage_15/group_std_mean": 0.2500757068395615,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_20/centered_abs_mean": 0.18503127694129945,
"signal/frontier_coverage_20/group_std_mean": 0.2500757068395615,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_25/centered_abs_mean": 0.11901406049728394,
"signal/frontier_coverage_25/group_std_mean": 0.16131974160671234,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002130351681262255,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002130351681262255,
"signal/frontier_coverage_5/centered_abs_mean": 0.18503127694129945,
"signal/frontier_coverage_5/group_std_mean": 0.2500757068395615,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003312059724703431,
"signal/frontier_ece_reward/centered_abs_mean": 0.0202214565128088,
"signal/frontier_ece_reward/group_std_mean": 0.026237889006733895,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025276820641011,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025276820641011,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_calibration/aurc": 0.14377964715819236,
"eval_calibration/batch_distribution_entropy": 0.8031513733746348,
"eval_calibration/buffer_distribution_entropy": 0.928078833996445,
"eval_calibration/confidence_entropy": 0.39359989336584955,
"eval_calibration/coverage@0%": 0.23172043010752688,
"eval_calibration/coverage@1%": 0.23172043010752688,
"eval_calibration/coverage@10%": 0.44854390681003586,
"eval_calibration/coverage@15%": 0.6677643369175628,
"eval_calibration/coverage@20%": 0.7693212365591399,
"eval_calibration/coverage@25%": 0.8956989247311827,
"eval_calibration/coverage@30%": 0.9555555555555556,
"eval_calibration/coverage@5%": 0.23172043010752688,
"eval_calibration/ece": 0.215171482975084,
"eval_calibration/mean_confidence": 0.6850989023299228,
"eval_completions/clipped_ratio": 0.021527777777777795,
"eval_completions/max_length": 3111.1666666666665,
"eval_completions/max_terminated_length": 3111.1666666666665,
"eval_completions/mean_length": 1071.9295450846355,
"eval_completions/mean_terminated_length": 1095.677001953125,
"eval_completions/min_length": 0.0,
"eval_completions/min_terminated_length": 372.3333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 293434420.0,
"eval_reward": 1.0389058788617451,
"eval_reward_std": 0.2671206171313922,
"eval_rewards/accuracy_reward": 0.671006957689921,
"eval_rewards/brier_reward": 0.7895856300989786,
"eval_rewards/confidence_uniqueness_reward": 0.8775778909524282,
"eval_rewards/format_reward": 0.9800347288449606,
"eval_rewards/frontier_aurc_reward": -0.001799254697592308,
"eval_rewards/frontier_coverage_1": 0.030476751853711903,
"eval_rewards/frontier_coverage_10": 0.030476751853711903,
"eval_rewards/frontier_coverage_15": 0.030476751853711903,
"eval_rewards/frontier_coverage_20": 0.030476751853711903,
"eval_rewards/frontier_coverage_25": 0.039223356172442436,
"eval_rewards/frontier_coverage_5": 0.030476751853711903,
"eval_rewards/frontier_ece_reward": 0.012736255613466104,
"eval_runtime": 218.0582,
"eval_samples_per_second": 4.586,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4236653645833333,
"eval_signal/accuracy_reward/group_std_mean": 0.4662252912918727,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21183268229166666,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21183268229166666,
"eval_signal/advantage_abs_mean": 0.22447845339775085,
"eval_signal/advantage_pre_scale_abs_mean": 0.22447845339775085,
"eval_signal/advantage_pre_scale_std": 0.26788055896759033,
"eval_signal/advantage_std": 0.26788055896759033,
"eval_signal/brier_reward/centered_abs_mean": 0.24005225549141565,
"eval_signal/brier_reward/group_std_mean": 0.2984655201435089,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030006531936426956,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.030006531936426956,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0669537124534448,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11659604435165723,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0083692140566806,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0083692140566806,
"eval_signal/format_reward/centered_abs_mean": 0.037923177083333336,
"eval_signal/format_reward/group_std_mean": 0.09447787639995416,
"eval_signal/format_reward/group_zero_std_frac": 0.5277777860562006,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018961588541666668,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.018961588541666668,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0027151394557828703,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004679826592716078,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8600995190402806e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8600995190402806e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.21269922455151877,
"eval_signal/frontier_coverage_1/group_std_mean": 0.33839886883894604,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.21269922455151877,
"eval_signal/frontier_coverage_10/group_std_mean": 0.33839886883894604,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.21269922455151877,
"eval_signal/frontier_coverage_15/group_std_mean": 0.33839886883894604,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.21269922455151877,
"eval_signal/frontier_coverage_20/group_std_mean": 0.33839886883894604,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1346950319906076,
"eval_signal/frontier_coverage_25/group_std_mean": 0.2107120007276535,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024110410595312715,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024110410595312715,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.21269922455151877,
"eval_signal/frontier_coverage_5/group_std_mean": 0.33839886883894604,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003807315952144563,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.022523170647521813,
"eval_signal/frontier_ece_reward/group_std_mean": 0.030609419258932274,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028153963309402266,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028153963309402266,
"eval_steps_per_second": 0.028,
"step": 100
},
{
"calibration/aurc": 0.3135478311696943,
"calibration/batch_distribution_entropy": 0.8595469934829717,
"calibration/buffer_distribution_entropy": 0.9278512946088252,
"calibration/confidence_entropy": 0.3974191602109486,
"calibration/coverage@0%": 0.013829787234042554,
"calibration/coverage@1%": 0.013829787234042554,
"calibration/coverage@10%": 0.14787234042553193,
"calibration/coverage@15%": 0.17220483938256154,
"calibration/coverage@20%": 0.20299209956745323,
"calibration/coverage@25%": 0.32088064824285356,
"calibration/coverage@30%": 0.48956564348977183,
"calibration/coverage@5%": 0.12340425531914893,
"calibration/ece": 0.1898765576679796,
"calibration/mean_confidence": 0.6748698600578947,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020486111111111094,
"completions/max_length": 3933.4,
"completions/max_terminated_length": 3933.4,
"completions/mean_length": 1077.4388061523437,
"completions/mean_terminated_length": 1100.0875732421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 255.6,
"epoch": 0.2519968500393745,
"grad_norm": 0.00034545789822004735,
"learning_rate": 3.1024096385542172e-06,
"loss": -0.0174,
"num_tokens": 308923379.0,
"reward": 1.04494047164917,
"reward_std": 0.1398667186498642,
"rewards/accuracy_reward": 0.6754340171813965,
"rewards/brier_reward": 0.782523512840271,
"rewards/confidence_uniqueness_reward": 0.9232323408126831,
"rewards/format_reward": 0.9793402671813964,
"rewards/frontier_aurc_reward": -0.0020483172265812755,
"rewards/frontier_coverage_1": 0.024506374448537826,
"rewards/frontier_coverage_10": 0.024506374448537826,
"rewards/frontier_coverage_15": 0.024506374448537826,
"rewards/frontier_coverage_20": 0.024506374448537826,
"rewards/frontier_coverage_25": 0.028536751121282577,
"rewards/frontier_coverage_5": 0.024506374448537826,
"rewards/frontier_ece_reward": 0.013331157714128494,
"signal/accuracy_reward/centered_abs_mean": 0.15463867038488388,
"signal/accuracy_reward/group_std_mean": 0.20896627902984619,
"signal/accuracy_reward/group_zero_std_frac": 0.39722222089767456,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07731933519244194,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07731933519244194,
"signal/advantage_abs_mean": 0.0999849259853363,
"signal/advantage_pre_scale_abs_mean": 0.0999849259853363,
"signal/advantage_pre_scale_std": 0.17430230379104614,
"signal/advantage_std": 0.17430230379104614,
"signal/brier_reward/centered_abs_mean": 0.16766727864742278,
"signal/brier_reward/group_std_mean": 0.21422589123249053,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020958409830927848,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020958409830927848,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05548132359981537,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0828926458954811,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006935165449976921,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006935165449976921,
"signal/format_reward/centered_abs_mean": 0.03021918348968029,
"signal/format_reward/group_std_mean": 0.05192293673753738,
"signal/format_reward/group_zero_std_frac": 0.8,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015109591744840145,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.015109591744840145,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020640650764107704,
"signal/frontier_aurc_reward/group_std_mean": 0.0032852147705852985,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6946763793821445e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6946763793821445e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16248934268951415,
"signal/frontier_coverage_1/group_std_mean": 0.22402643859386445,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_10/centered_abs_mean": 0.16248934268951415,
"signal/frontier_coverage_10/group_std_mean": 0.22402643859386445,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_15/centered_abs_mean": 0.16248934268951415,
"signal/frontier_coverage_15/group_std_mean": 0.22402643859386445,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_20/centered_abs_mean": 0.16248934268951415,
"signal/frontier_coverage_20/group_std_mean": 0.22402643859386445,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_25/centered_abs_mean": 0.10017142742872238,
"signal/frontier_coverage_25/group_std_mean": 0.13701665103435517,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017930685309693218,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017930685309693218,
"signal/frontier_coverage_5/centered_abs_mean": 0.16248934268951415,
"signal/frontier_coverage_5/group_std_mean": 0.22402643859386445,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002908559050410986,
"signal/frontier_ece_reward/centered_abs_mean": 0.020253218337893487,
"signal/frontier_ece_reward/group_std_mean": 0.025626911595463753,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002531652292236686,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002531652292236686,
"step": 105
},
{
"calibration/aurc": 0.18315490609084778,
"calibration/batch_distribution_entropy": 0.7765032894890078,
"calibration/buffer_distribution_entropy": 0.9242474732058756,
"calibration/confidence_entropy": 0.3775560392498729,
"calibration/coverage@0%": 0.06752858081031457,
"calibration/coverage@1%": 0.0759496334418935,
"calibration/coverage@10%": 0.28782259073587924,
"calibration/coverage@15%": 0.36077427800708595,
"calibration/coverage@20%": 0.5508072448753857,
"calibration/coverage@25%": 0.6846715225109482,
"calibration/coverage@30%": 0.7913718321314601,
"calibration/coverage@5%": 0.23705855547960814,
"calibration/ece": 0.14042555442163512,
"calibration/mean_confidence": 0.7471171009304948,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014670138888888884,
"completions/max_length": 3876.6,
"completions/max_terminated_length": 3876.6,
"completions/mean_length": 1131.8022705078124,
"completions/mean_terminated_length": 1148.7941162109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 295.6,
"epoch": 0.2639967000412495,
"grad_norm": 0.00035962072433903813,
"learning_rate": 2.9518072289156627e-06,
"loss": -0.0127,
"num_tokens": 325070189.0,
"reward": 1.0619823932647705,
"reward_std": 0.14728475213050843,
"rewards/accuracy_reward": 0.6998263955116272,
"rewards/brier_reward": 0.8028295993804931,
"rewards/confidence_uniqueness_reward": 0.9204535126686096,
"rewards/format_reward": 0.9849826335906983,
"rewards/frontier_aurc_reward": -0.002051501488313079,
"rewards/frontier_coverage_1": 0.020819610450416803,
"rewards/frontier_coverage_10": 0.020819610450416803,
"rewards/frontier_coverage_15": 0.020819610450416803,
"rewards/frontier_coverage_20": 0.020819610450416803,
"rewards/frontier_coverage_25": 0.03544456548988819,
"rewards/frontier_coverage_5": 0.020819610450416803,
"rewards/frontier_ece_reward": 0.01365122813731432,
"signal/accuracy_reward/centered_abs_mean": 0.16985676884651185,
"signal/accuracy_reward/group_std_mean": 0.22959282100200654,
"signal/accuracy_reward/group_zero_std_frac": 0.32777778506278993,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08492838442325593,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08492838442325593,
"signal/advantage_abs_mean": 0.10342257767915726,
"signal/advantage_pre_scale_abs_mean": 0.10342257767915726,
"signal/advantage_pre_scale_std": 0.18322791755199433,
"signal/advantage_std": 0.18322791755199433,
"signal/brier_reward/centered_abs_mean": 0.1463008463382721,
"signal/brier_reward/group_std_mean": 0.19255680441856385,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01828760579228401,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01828760579228401,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0545014426112175,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08425513207912445,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006812680326402187,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006812680326402187,
"signal/format_reward/centered_abs_mean": 0.0259168840944767,
"signal/format_reward/group_std_mean": 0.050569449365139005,
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01295844204723835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01295844204723835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021926365327090023,
"signal/frontier_aurc_reward/group_std_mean": 0.003540871059522033,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.924819320673123e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.924819320673123e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1195900097489357,
"signal/frontier_coverage_1/group_std_mean": 0.17233213186264038,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_10/centered_abs_mean": 0.1195900097489357,
"signal/frontier_coverage_10/group_std_mean": 0.17233213186264038,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_15/centered_abs_mean": 0.1195900097489357,
"signal/frontier_coverage_15/group_std_mean": 0.17233213186264038,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_20/centered_abs_mean": 0.1195900097489357,
"signal/frontier_coverage_20/group_std_mean": 0.17233213186264038,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_25/centered_abs_mean": 0.07210812345147133,
"signal/frontier_coverage_25/group_std_mean": 0.10068325251340866,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012907354161143304,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012907354161143304,
"signal/frontier_coverage_5/centered_abs_mean": 0.1195900097489357,
"signal/frontier_coverage_5/group_std_mean": 0.17233213186264038,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021406610263511538,
"signal/frontier_ece_reward/centered_abs_mean": 0.0171145960688591,
"signal/frontier_ece_reward/group_std_mean": 0.021544494852423667,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021393245086073877,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021393245086073877,
"step": 110
},
{
"calibration/aurc": 0.29120583351889173,
"calibration/batch_distribution_entropy": 0.7863150995666791,
"calibration/buffer_distribution_entropy": 0.9207539489185862,
"calibration/confidence_entropy": 0.4187937868082977,
"calibration/coverage@0%": 0.0075278531686347825,
"calibration/coverage@1%": 0.0075278531686347825,
"calibration/coverage@10%": 0.06411135680169563,
"calibration/coverage@15%": 0.1510769604728217,
"calibration/coverage@20%": 0.3940149695408244,
"calibration/coverage@25%": 0.6425000196232538,
"calibration/coverage@30%": 0.6874048087618722,
"calibration/coverage@5%": 0.0075278531686347825,
"calibration/ece": 0.18694521114390877,
"calibration/mean_confidence": 0.748259641465957,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01684027777777779,
"completions/max_length": 3995.0,
"completions/max_terminated_length": 3995.0,
"completions/mean_length": 1128.9096435546876,
"completions/mean_terminated_length": 1148.427880859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 324.2,
"epoch": 0.27599655004312446,
"grad_norm": 0.00030474571394734085,
"learning_rate": 2.8012048192771087e-06,
"loss": -0.0141,
"num_tokens": 341154428.0,
"reward": 1.0459946870803833,
"reward_std": 0.14044857025146484,
"rewards/accuracy_reward": 0.6740451335906983,
"rewards/brier_reward": 0.7880466699600219,
"rewards/confidence_uniqueness_reward": 0.9210368752479553,
"rewards/format_reward": 0.9829861044883728,
"rewards/frontier_aurc_reward": -0.002286965842358768,
"rewards/frontier_coverage_1": 0.022654338832944633,
"rewards/frontier_coverage_10": 0.022654338832944633,
"rewards/frontier_coverage_15": 0.022654338832944633,
"rewards/frontier_coverage_20": 0.022654338832944633,
"rewards/frontier_coverage_25": 0.030701416730880737,
"rewards/frontier_coverage_5": 0.022654338832944633,
"rewards/frontier_ece_reward": 0.010459364019334316,
"signal/accuracy_reward/centered_abs_mean": 0.16135525107383727,
"signal/accuracy_reward/group_std_mean": 0.21080092787742616,
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08067762553691864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08067762553691864,
"signal/advantage_abs_mean": 0.10324146151542664,
"signal/advantage_pre_scale_abs_mean": 0.10324146151542664,
"signal/advantage_pre_scale_std": 0.17995203137397767,
"signal/advantage_std": 0.17995203137397767,
"signal/brier_reward/centered_abs_mean": 0.1440261572599411,
"signal/brier_reward/group_std_mean": 0.1866879642009735,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01800326965749264,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01800326965749264,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0526352696120739,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07682174444198608,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006579408701509237,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006579408701509237,
"signal/format_reward/centered_abs_mean": 0.02632378451526165,
"signal/format_reward/group_std_mean": 0.045665005967020986,
"signal/format_reward/group_zero_std_frac": 0.8277777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013161892257630824,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013161892257630824,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021879581967368723,
"signal/frontier_aurc_reward/group_std_mean": 0.0034705805126577617,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9164449844975024e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9164449844975024e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.10926453918218612,
"signal/frontier_coverage_1/group_std_mean": 0.15879356861114502,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_10/centered_abs_mean": 0.10926453918218612,
"signal/frontier_coverage_10/group_std_mean": 0.15879356861114502,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_15/centered_abs_mean": 0.10926453918218612,
"signal/frontier_coverage_15/group_std_mean": 0.15879356861114502,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_20/centered_abs_mean": 0.10926453918218612,
"signal/frontier_coverage_20/group_std_mean": 0.15879356861114502,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_25/centered_abs_mean": 0.06192091777920723,
"signal/frontier_coverage_25/group_std_mean": 0.08779880106449127,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001108384388498962,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001108384388498962,
"signal/frontier_coverage_5/centered_abs_mean": 0.10926453918218612,
"signal/frontier_coverage_5/group_std_mean": 0.15879356861114502,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019558351254090668,
"signal/frontier_ece_reward/centered_abs_mean": 0.015209457091987132,
"signal/frontier_ece_reward/group_std_mean": 0.019499244540929793,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019011821364983915,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019011821364983915,
"step": 115
},
{
"calibration/aurc": 0.29482160200693,
"calibration/batch_distribution_entropy": 0.8057582916578456,
"calibration/buffer_distribution_entropy": 0.9189735844921059,
"calibration/confidence_entropy": 0.41709737555410004,
"calibration/coverage@0%": 0.028767226550989223,
"calibration/coverage@1%": 0.028767226550989223,
"calibration/coverage@10%": 0.11131487154184057,
"calibration/coverage@15%": 0.23079975514591305,
"calibration/coverage@20%": 0.31510173209402237,
"calibration/coverage@25%": 0.5533219287673521,
"calibration/coverage@30%": 0.5847659094969149,
"calibration/coverage@5%": 0.053310307490937,
"calibration/ece": 0.16388524171934898,
"calibration/mean_confidence": 0.7294418950441234,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 4023.4,
"completions/max_terminated_length": 4023.4,
"completions/mean_length": 1133.4726806640624,
"completions/mean_terminated_length": 1146.8188232421876,
"completions/min_length": 0.0,
"completions/min_terminated_length": 325.2,
"epoch": 0.28799640004499943,
"grad_norm": 0.00026670683291740716,
"learning_rate": 2.6506024096385547e-06,
"loss": -0.0099,
"num_tokens": 357293889.0,
"reward": 1.055515742301941,
"reward_std": 0.13213830143213273,
"rewards/accuracy_reward": 0.6822048664093018,
"rewards/brier_reward": 0.7987765789031982,
"rewards/confidence_uniqueness_reward": 0.9297965288162231,
"rewards/format_reward": 0.9881076335906982,
"rewards/frontier_aurc_reward": -0.0020097248489037156,
"rewards/frontier_coverage_1": 0.027725940570235252,
"rewards/frontier_coverage_10": 0.027725940570235252,
"rewards/frontier_coverage_15": 0.027725940570235252,
"rewards/frontier_coverage_20": 0.027725940570235252,
"rewards/frontier_coverage_25": 0.031627381592988967,
"rewards/frontier_coverage_5": 0.027725940570235252,
"rewards/frontier_ece_reward": 0.010209777392446995,
"signal/accuracy_reward/centered_abs_mean": 0.15929362177848816,
"signal/accuracy_reward/group_std_mean": 0.20812316238880157,
"signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07964681088924408,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07964681088924408,
"signal/advantage_abs_mean": 0.09782680720090867,
"signal/advantage_pre_scale_abs_mean": 0.09782680720090867,
"signal/advantage_pre_scale_std": 0.1714523106813431,
"signal/advantage_std": 0.1714523106813431,
"signal/brier_reward/centered_abs_mean": 0.14243703782558442,
"signal/brier_reward/group_std_mean": 0.1831719845533371,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017804629728198053,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017804629728198053,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.043136756867170334,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06378482431173324,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005392094608396292,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005392094608396292,
"signal/format_reward/centered_abs_mean": 0.01975368931889534,
"signal/format_reward/group_std_mean": 0.03594511151313782,
"signal/format_reward/group_zero_std_frac": 0.8555555462837219,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00987684465944767,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00987684465944767,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019732348155230284,
"signal/frontier_aurc_reward/group_std_mean": 0.003135677380487323,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.532090340740979e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.532090340740979e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11823989003896714,
"signal/frontier_coverage_1/group_std_mean": 0.16819217205047607,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_10/centered_abs_mean": 0.11823989003896714,
"signal/frontier_coverage_10/group_std_mean": 0.16819217205047607,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_15/centered_abs_mean": 0.11823989003896714,
"signal/frontier_coverage_15/group_std_mean": 0.16819217205047607,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_20/centered_abs_mean": 0.11823989003896714,
"signal/frontier_coverage_20/group_std_mean": 0.16819217205047607,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_25/centered_abs_mean": 0.058430235087871554,
"signal/frontier_coverage_25/group_std_mean": 0.08083060085773468,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010459011886268855,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010459011886268855,
"signal/frontier_coverage_5/centered_abs_mean": 0.11823989003896714,
"signal/frontier_coverage_5/group_std_mean": 0.16819217205047607,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021164938574656845,
"signal/frontier_ece_reward/centered_abs_mean": 0.015391989797353744,
"signal/frontier_ece_reward/group_std_mean": 0.019800475612282754,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001923998724669218,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001923998724669218,
"step": 120
},
{
"calibration/aurc": 0.16268769554340473,
"calibration/batch_distribution_entropy": 0.7832053317397112,
"calibration/buffer_distribution_entropy": 0.9172995538320976,
"calibration/confidence_entropy": 0.4107882901625729,
"calibration/coverage@0%": 0.014583333333333332,
"calibration/coverage@1%": 0.014583333333333332,
"calibration/coverage@10%": 0.337894364664926,
"calibration/coverage@15%": 0.412960021052384,
"calibration/coverage@20%": 0.7071476371933291,
"calibration/coverage@25%": 0.8396603979959071,
"calibration/coverage@30%": 0.9393918918918919,
"calibration/coverage@5%": 0.1550397084421236,
"calibration/ece": 0.09877240124714463,
"calibration/mean_confidence": 0.7348249590106904,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555537,
"completions/max_length": 3864.6,
"completions/max_terminated_length": 3864.6,
"completions/mean_length": 1138.81005859375,
"completions/mean_terminated_length": 1150.768701171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 324.8,
"epoch": 0.2999962500468744,
"grad_norm": 0.0003056655405089259,
"learning_rate": 2.5e-06,
"loss": -0.0101,
"num_tokens": 373530645.0,
"reward": 1.0652480840682983,
"reward_std": 0.1264243721961975,
"rewards/accuracy_reward": 0.6951388955116272,
"rewards/brier_reward": 0.8122022986412049,
"rewards/confidence_uniqueness_reward": 0.9318351149559021,
"rewards/format_reward": 0.9897569417953491,
"rewards/frontier_aurc_reward": -0.0017485103104263543,
"rewards/frontier_coverage_1": 0.03179278327152133,
"rewards/frontier_coverage_10": 0.03179278327152133,
"rewards/frontier_coverage_15": 0.03179278327152133,
"rewards/frontier_coverage_20": 0.03179278327152133,
"rewards/frontier_coverage_25": 0.03835425637662411,
"rewards/frontier_coverage_5": 0.03179278327152133,
"rewards/frontier_ece_reward": 0.010358355939388275,
"signal/accuracy_reward/centered_abs_mean": 0.15129123330116273,
"signal/accuracy_reward/group_std_mean": 0.20266271233558655,
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07564561665058137,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07564561665058137,
"signal/advantage_abs_mean": 0.09205293953418732,
"signal/advantage_pre_scale_abs_mean": 0.09205293953418732,
"signal/advantage_pre_scale_std": 0.16289040446281433,
"signal/advantage_std": 0.16289040446281433,
"signal/brier_reward/centered_abs_mean": 0.1405400887131691,
"signal/brier_reward/group_std_mean": 0.18279159367084502,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017567511089146136,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017567511089146136,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04126947373151779,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06011983305215836,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005158684216439724,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005158684216439724,
"signal/format_reward/centered_abs_mean": 0.01725260429084301,
"signal/format_reward/group_std_mean": 0.03177933469414711,
"signal/format_reward/group_zero_std_frac": 0.8722222328186036,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008626302145421505,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008626302145421505,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018054211512207984,
"signal/frontier_aurc_reward/group_std_mean": 0.0029377146624028684,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.231703594792634e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.231703594792634e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12659138143062593,
"signal/frontier_coverage_1/group_std_mean": 0.18150453865528107,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_10/centered_abs_mean": 0.12659138143062593,
"signal/frontier_coverage_10/group_std_mean": 0.18150453865528107,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_15/centered_abs_mean": 0.12659138143062593,
"signal/frontier_coverage_15/group_std_mean": 0.18150453865528107,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_20/centered_abs_mean": 0.12659138143062593,
"signal/frontier_coverage_20/group_std_mean": 0.18150453865528107,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_25/centered_abs_mean": 0.06125762164592743,
"signal/frontier_coverage_25/group_std_mean": 0.08552575558423996,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001096511399373412,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001096511399373412,
"signal/frontier_coverage_5/centered_abs_mean": 0.12659138143062593,
"signal/frontier_coverage_5/group_std_mean": 0.18150453865528107,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022659855661913753,
"signal/frontier_ece_reward/centered_abs_mean": 0.01536604668945074,
"signal/frontier_ece_reward/group_std_mean": 0.019951780140399934,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019207558361813426,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019207558361813426,
"step": 125
},
{
"calibration/aurc": 0.21656307838282568,
"calibration/batch_distribution_entropy": 0.8418151261521143,
"calibration/buffer_distribution_entropy": 0.9155357951937564,
"calibration/confidence_entropy": 0.4046419839826144,
"calibration/coverage@0%": 0.031999336915829756,
"calibration/coverage@1%": 0.031999336915829756,
"calibration/coverage@10%": 0.26968682939463356,
"calibration/coverage@15%": 0.3515532872272156,
"calibration/coverage@20%": 0.4407191523101236,
"calibration/coverage@25%": 0.5056258584505888,
"calibration/coverage@30%": 0.8617951969687244,
"calibration/coverage@5%": 0.03622097279973477,
"calibration/ece": 0.10678372431681085,
"calibration/mean_confidence": 0.6637441296139539,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017187499999999977,
"completions/max_length": 3903.2,
"completions/max_terminated_length": 3903.2,
"completions/mean_length": 1170.849853515625,
"completions/mean_terminated_length": 1191.54580078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 298.4,
"epoch": 0.3119961000487494,
"grad_norm": 0.0003094042185693979,
"learning_rate": 2.349397590361446e-06,
"loss": -0.0146,
"num_tokens": 390143635.0,
"reward": 1.0414445877075196,
"reward_std": 0.14395586848258973,
"rewards/accuracy_reward": 0.6592013955116272,
"rewards/brier_reward": 0.7887712478637695,
"rewards/confidence_uniqueness_reward": 0.9302976727485657,
"rewards/format_reward": 0.9826388955116272,
"rewards/frontier_aurc_reward": -0.001909919991157949,
"rewards/frontier_coverage_1": 0.04098189903888851,
"rewards/frontier_coverage_10": 0.04098189903888851,
"rewards/frontier_coverage_15": 0.04098189903888851,
"rewards/frontier_coverage_20": 0.04098189903888851,
"rewards/frontier_coverage_25": 0.04001305103302002,
"rewards/frontier_coverage_5": 0.04098189903888851,
"rewards/frontier_ece_reward": 0.010327290836721658,
"signal/accuracy_reward/centered_abs_mean": 0.1720269113779068,
"signal/accuracy_reward/group_std_mean": 0.22580362856388092,
"signal/accuracy_reward/group_zero_std_frac": 0.3666666686534882,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0860134556889534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0860134556889534,
"signal/advantage_abs_mean": 0.10680273771286011,
"signal/advantage_pre_scale_abs_mean": 0.10680273771286011,
"signal/advantage_pre_scale_std": 0.17836588323116304,
"signal/advantage_std": 0.17836588323116304,
"signal/brier_reward/centered_abs_mean": 0.1629619389772415,
"signal/brier_reward/group_std_mean": 0.21055279076099395,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02037024237215519,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02037024237215519,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045919667929410934,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06701685413718224,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005739958491176367,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005739958491176367,
"signal/format_reward/centered_abs_mean": 0.026041666232049464,
"signal/format_reward/group_std_mean": 0.043287623673677444,
"signal/format_reward/group_zero_std_frac": 0.8361111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013020833116024732,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013020833116024732,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001999038900248706,
"signal/frontier_aurc_reward/group_std_mean": 0.003307389048859477,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5782793565886097e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5782793565886097e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15618787109851837,
"signal/frontier_coverage_1/group_std_mean": 0.21958717703819275,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_10/centered_abs_mean": 0.15618787109851837,
"signal/frontier_coverage_10/group_std_mean": 0.21958717703819275,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_15/centered_abs_mean": 0.15618787109851837,
"signal/frontier_coverage_15/group_std_mean": 0.21958717703819275,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_20/centered_abs_mean": 0.15618787109851837,
"signal/frontier_coverage_20/group_std_mean": 0.21958717703819275,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_25/centered_abs_mean": 0.07262331247329712,
"signal/frontier_coverage_25/group_std_mean": 0.10001310557127,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001299957255832851,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001299957255832851,
"signal/frontier_coverage_5/centered_abs_mean": 0.15618787109851837,
"signal/frontier_coverage_5/group_std_mean": 0.21958717703819275,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002795762661844492,
"signal/frontier_ece_reward/centered_abs_mean": 0.01741938292980194,
"signal/frontier_ece_reward/group_std_mean": 0.022634774819016455,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021774228662252426,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021774228662252426,
"step": 130
},
{
"calibration/aurc": 0.23023647946494208,
"calibration/batch_distribution_entropy": 0.7881558617702,
"calibration/buffer_distribution_entropy": 0.9147557758253775,
"calibration/confidence_entropy": 0.36381260329450316,
"calibration/coverage@0%": 0.009973753280839895,
"calibration/coverage@1%": 0.009973753280839895,
"calibration/coverage@10%": 0.1994750656167979,
"calibration/coverage@15%": 0.34993662280701754,
"calibration/coverage@20%": 0.4895458333333333,
"calibration/coverage@25%": 0.6612416666666667,
"calibration/coverage@30%": 0.7436381578947369,
"calibration/coverage@5%": 0.1853018372703412,
"calibration/ece": 0.16311325265378912,
"calibration/mean_confidence": 0.7145244028217494,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011979166666666674,
"completions/max_length": 3853.2,
"completions/max_terminated_length": 3853.2,
"completions/mean_length": 1161.9842041015625,
"completions/mean_terminated_length": 1176.113623046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 273.6,
"epoch": 0.32399595005062437,
"grad_norm": 0.000337834469974041,
"learning_rate": 2.1987951807228917e-06,
"loss": -0.0106,
"num_tokens": 406622717.0,
"reward": 1.0532086849212647,
"reward_std": 0.1362074300646782,
"rewards/accuracy_reward": 0.6723958373069763,
"rewards/brier_reward": 0.7999706029891968,
"rewards/confidence_uniqueness_reward": 0.9347299575805664,
"rewards/format_reward": 0.9876736044883728,
"rewards/frontier_aurc_reward": -0.001792767085134983,
"rewards/frontier_coverage_1": 0.04573216512799263,
"rewards/frontier_coverage_10": 0.04573216512799263,
"rewards/frontier_coverage_15": 0.04573216512799263,
"rewards/frontier_coverage_20": 0.04573216512799263,
"rewards/frontier_coverage_25": 0.04576835259795189,
"rewards/frontier_coverage_5": 0.04573216512799263,
"rewards/frontier_ece_reward": 0.011649912409484386,
"signal/accuracy_reward/centered_abs_mean": 0.1695746511220932,
"signal/accuracy_reward/group_std_mean": 0.2238933861255646,
"signal/accuracy_reward/group_zero_std_frac": 0.3611111044883728,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0847873255610466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0847873255610466,
"signal/advantage_abs_mean": 0.09791278541088104,
"signal/advantage_pre_scale_abs_mean": 0.09791278541088104,
"signal/advantage_pre_scale_std": 0.16925244629383088,
"signal/advantage_std": 0.16925244629383088,
"signal/brier_reward/centered_abs_mean": 0.1602795511484146,
"signal/brier_reward/group_std_mean": 0.2083508402109146,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020034943893551826,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020034943893551826,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04163586869835854,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06607603505253792,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005204483587294817,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005204483587294817,
"signal/format_reward/centered_abs_mean": 0.02120225690305233,
"signal/format_reward/group_std_mean": 0.04186696708202362,
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010601128451526165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010601128451526165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001877031335607171,
"signal/frontier_aurc_reward/group_std_mean": 0.002974188607186079,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.359886177349836e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.359886177349836e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1724255710840225,
"signal/frontier_coverage_1/group_std_mean": 0.2394712746143341,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_10/centered_abs_mean": 0.1724255710840225,
"signal/frontier_coverage_10/group_std_mean": 0.2394712746143341,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_15/centered_abs_mean": 0.1724255710840225,
"signal/frontier_coverage_15/group_std_mean": 0.2394712746143341,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_20/centered_abs_mean": 0.1724255710840225,
"signal/frontier_coverage_20/group_std_mean": 0.2394712746143341,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_25/centered_abs_mean": 0.07950729578733444,
"signal/frontier_coverage_25/group_std_mean": 0.10724284201860428,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001423180545680225,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001423180545680225,
"signal/frontier_coverage_5/centered_abs_mean": 0.1724255710840225,
"signal/frontier_coverage_5/group_std_mean": 0.2394712746143341,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003086417680606246,
"signal/frontier_ece_reward/centered_abs_mean": 0.01801157519221306,
"signal/frontier_ece_reward/group_std_mean": 0.023027915880084036,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022514468990266325,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022514468990266325,
"step": 135
},
{
"calibration/aurc": 0.1906562783077283,
"calibration/batch_distribution_entropy": 0.8116671180904721,
"calibration/buffer_distribution_entropy": 0.9134489331110502,
"calibration/confidence_entropy": 0.3747389081244313,
"calibration/coverage@0%": 0.0015665796344647518,
"calibration/coverage@1%": 0.0015665796344647518,
"calibration/coverage@10%": 0.14696577713433928,
"calibration/coverage@15%": 0.3754393639553163,
"calibration/coverage@20%": 0.5833659052356869,
"calibration/coverage@25%": 0.7757019471645958,
"calibration/coverage@30%": 0.9518546877147177,
"calibration/coverage@5%": 0.08167410651618517,
"calibration/ece": 0.14092391131055967,
"calibration/mean_confidence": 0.7090212503403477,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010156249999999978,
"completions/max_length": 3922.8,
"completions/max_terminated_length": 3922.8,
"completions/mean_length": 1160.94765625,
"completions/mean_terminated_length": 1172.8401611328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 344.0,
"epoch": 0.33599580005249935,
"grad_norm": 0.00027862933347932994,
"learning_rate": 2.0481927710843377e-06,
"loss": -0.0091,
"num_tokens": 423101058.0,
"reward": 1.0563385009765625,
"reward_std": 0.12681164890527724,
"rewards/accuracy_reward": 0.6782986044883728,
"rewards/brier_reward": 0.7978750586509704,
"rewards/confidence_uniqueness_reward": 0.9353888034820557,
"rewards/format_reward": 0.9897569417953491,
"rewards/frontier_aurc_reward": -0.001826054509729147,
"rewards/frontier_coverage_1": 0.0388026436092332,
"rewards/frontier_coverage_10": 0.0388026436092332,
"rewards/frontier_coverage_15": 0.0388026436092332,
"rewards/frontier_coverage_20": 0.0373392676236108,
"rewards/frontier_coverage_25": 0.04751555323600769,
"rewards/frontier_coverage_5": 0.0388026436092332,
"rewards/frontier_ece_reward": 0.011105910316109658,
"signal/accuracy_reward/centered_abs_mean": 0.15526258647441865,
"signal/accuracy_reward/group_std_mean": 0.2096972107887268,
"signal/accuracy_reward/group_zero_std_frac": 0.3833333313465118,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07763129323720933,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07763129323720933,
"signal/advantage_abs_mean": 0.09159746617078782,
"signal/advantage_pre_scale_abs_mean": 0.09159746617078782,
"signal/advantage_pre_scale_std": 0.1599065124988556,
"signal/advantage_std": 0.1599065124988556,
"signal/brier_reward/centered_abs_mean": 0.15635252892971038,
"signal/brier_reward/group_std_mean": 0.20334820151329042,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019544066116213798,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019544066116213798,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039848759025335315,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05892389565706253,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004981094878166914,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004981094878166914,
"signal/format_reward/centered_abs_mean": 0.01740451380610466,
"signal/format_reward/group_std_mean": 0.03113800659775734,
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00870225690305233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00870225690305233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019128842512145638,
"signal/frontier_aurc_reward/group_std_mean": 0.00324101191945374,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.424062597332522e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.424062597332522e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16529696583747863,
"signal/frontier_coverage_1/group_std_mean": 0.2307574212551117,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029588155448436737,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029588155448436737,
"signal/frontier_coverage_10/centered_abs_mean": 0.16529696583747863,
"signal/frontier_coverage_10/group_std_mean": 0.2307574212551117,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029588155448436737,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029588155448436737,
"signal/frontier_coverage_15/centered_abs_mean": 0.16529696583747863,
"signal/frontier_coverage_15/group_std_mean": 0.2307574212551117,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029588155448436737,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029588155448436737,
"signal/frontier_coverage_20/centered_abs_mean": 0.1624933660030365,
"signal/frontier_coverage_20/group_std_mean": 0.22703517079353333,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00290863118134439,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00290863118134439,
"signal/frontier_coverage_25/centered_abs_mean": 0.07454123198986054,
"signal/frontier_coverage_25/group_std_mean": 0.09904106110334396,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013342880178242923,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013342880178242923,
"signal/frontier_coverage_5/centered_abs_mean": 0.16529696583747863,
"signal/frontier_coverage_5/group_std_mean": 0.2307574212551117,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029588155448436737,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029588155448436737,
"signal/frontier_ece_reward/centered_abs_mean": 0.01730274744331837,
"signal/frontier_ece_reward/group_std_mean": 0.02207809016108513,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002162843430414796,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002162843430414796,
"step": 140
},
{
"calibration/aurc": 0.18805905954891727,
"calibration/batch_distribution_entropy": 0.8317421531074294,
"calibration/buffer_distribution_entropy": 0.9134495671386187,
"calibration/confidence_entropy": 0.3853728989005753,
"calibration/coverage@0%": 0.026336837841754234,
"calibration/coverage@1%": 0.026336837841754234,
"calibration/coverage@10%": 0.310333026899385,
"calibration/coverage@15%": 0.41578143820163777,
"calibration/coverage@20%": 0.5445131776015526,
"calibration/coverage@25%": 0.7681225346528582,
"calibration/coverage@30%": 0.8655409301907436,
"calibration/coverage@5%": 0.06604470680494234,
"calibration/ece": 0.09805606586818498,
"calibration/mean_confidence": 0.6831073168113699,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008767361111111071,
"completions/max_length": 3604.8,
"completions/max_terminated_length": 3604.8,
"completions/mean_length": 1115.1513427734376,
"completions/mean_terminated_length": 1125.17373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 308.6,
"epoch": 0.34799565005437433,
"grad_norm": 0.00028119460330344737,
"learning_rate": 1.8975903614457832e-06,
"loss": -0.0078,
"num_tokens": 439012209.0,
"reward": 1.0752248764038086,
"reward_std": 0.11887196749448777,
"rewards/accuracy_reward": 0.71015625,
"rewards/brier_reward": 0.8194115638732911,
"rewards/confidence_uniqueness_reward": 0.9335981130599975,
"rewards/format_reward": 0.9911458253860473,
"rewards/frontier_aurc_reward": -0.0018106767674908042,
"rewards/frontier_coverage_1": 0.03475271426141262,
"rewards/frontier_coverage_10": 0.03475271426141262,
"rewards/frontier_coverage_15": 0.03475271426141262,
"rewards/frontier_coverage_20": 0.03500533141195774,
"rewards/frontier_coverage_25": 0.06168616786599159,
"rewards/frontier_coverage_5": 0.03475271426141262,
"rewards/frontier_ece_reward": 0.01008757334202528,
"signal/accuracy_reward/centered_abs_mean": 0.13445637822151185,
"signal/accuracy_reward/group_std_mean": 0.18606190383434296,
"signal/accuracy_reward/group_zero_std_frac": 0.43333333134651186,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06722818911075593,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06722818911075593,
"signal/advantage_abs_mean": 0.08313022255897522,
"signal/advantage_pre_scale_abs_mean": 0.08313022255897522,
"signal/advantage_pre_scale_std": 0.15367571711540223,
"signal/advantage_std": 0.15367571711540223,
"signal/brier_reward/centered_abs_mean": 0.13741165697574614,
"signal/brier_reward/group_std_mean": 0.1829265683889389,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017176457121968268,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017176457121968268,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03806578069925308,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06106965392827988,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004758222587406635,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004758222587406635,
"signal/format_reward/centered_abs_mean": 0.015831163432449103,
"signal/format_reward/group_std_mean": 0.03331194259226322,
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007915581716224552,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007915581716224552,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020372898085042833,
"signal/frontier_aurc_reward/group_std_mean": 0.0033891588915139436,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.646748591563664e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.646748591563664e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12970810532569885,
"signal/frontier_coverage_1/group_std_mean": 0.18752795159816743,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002321774885058403,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002321774885058403,
"signal/frontier_coverage_10/centered_abs_mean": 0.12970810532569885,
"signal/frontier_coverage_10/group_std_mean": 0.18752795159816743,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002321774885058403,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002321774885058403,
"signal/frontier_coverage_15/centered_abs_mean": 0.12970810532569885,
"signal/frontier_coverage_15/group_std_mean": 0.18752795159816743,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002321774885058403,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002321774885058403,
"signal/frontier_coverage_20/centered_abs_mean": 0.10424077808856964,
"signal/frontier_coverage_20/group_std_mean": 0.15195895731449127,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018659099237993359,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018659099237993359,
"signal/frontier_coverage_25/centered_abs_mean": 0.06490491330623627,
"signal/frontier_coverage_25/group_std_mean": 0.08455234318971634,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011617979034781456,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011617979034781456,
"signal/frontier_coverage_5/centered_abs_mean": 0.12970810532569885,
"signal/frontier_coverage_5/group_std_mean": 0.18752795159816743,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002321774885058403,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002321774885058403,
"signal/frontier_ece_reward/centered_abs_mean": 0.014038374833762645,
"signal/frontier_ece_reward/group_std_mean": 0.01795310601592064,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017547968542203306,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017547968542203306,
"step": 145
},
{
"calibration/aurc": 0.16444658049930577,
"calibration/batch_distribution_entropy": 0.8554792996980854,
"calibration/buffer_distribution_entropy": 0.9121123999463061,
"calibration/confidence_entropy": 0.4054611707906853,
"calibration/coverage@0%": 0.10798269230769231,
"calibration/coverage@1%": 0.12177579575596817,
"calibration/coverage@10%": 0.43126122905874886,
"calibration/coverage@15%": 0.5021995007429834,
"calibration/coverage@20%": 0.6263020287958115,
"calibration/coverage@25%": 0.6916137652705061,
"calibration/coverage@30%": 0.7823606457242582,
"calibration/coverage@5%": 0.30052938770999116,
"calibration/ece": 0.14277405261871456,
"calibration/mean_confidence": 0.6701547841980939,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009461805555555558,
"completions/max_length": 3815.6,
"completions/max_terminated_length": 3815.6,
"completions/mean_length": 1194.10302734375,
"completions/mean_terminated_length": 1205.4841552734374,
"completions/min_length": 0.0,
"completions/min_terminated_length": 301.4,
"epoch": 0.3599955000562493,
"grad_norm": 0.00032394277513958514,
"learning_rate": 1.7469879518072292e-06,
"loss": -0.0072,
"num_tokens": 455878612.0,
"reward": 1.0641360998153686,
"reward_std": 0.12625986337661743,
"rewards/accuracy_reward": 0.688368046283722,
"rewards/brier_reward": 0.8141419291496277,
"rewards/confidence_uniqueness_reward": 0.9379014372825623,
"rewards/format_reward": 0.9903645753860474,
"rewards/frontier_aurc_reward": -0.0015774117084220052,
"rewards/frontier_coverage_1": 0.040133790113031864,
"rewards/frontier_coverage_10": 0.040133790113031864,
"rewards/frontier_coverage_15": 0.03999885078519583,
"rewards/frontier_coverage_20": 0.037344107404351234,
"rewards/frontier_coverage_25": 0.06747718080878258,
"rewards/frontier_coverage_5": 0.040133790113031864,
"rewards/frontier_ece_reward": 0.008361095190048217,
"signal/accuracy_reward/centered_abs_mean": 0.1630425363779068,
"signal/accuracy_reward/group_std_mean": 0.2126096099615097,
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0815212681889534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0815212681889534,
"signal/advantage_abs_mean": 0.09208865314722062,
"signal/advantage_pre_scale_abs_mean": 0.09208865314722062,
"signal/advantage_pre_scale_std": 0.1610693395137787,
"signal/advantage_std": 0.1610693395137787,
"signal/brier_reward/centered_abs_mean": 0.14592998921871186,
"signal/brier_reward/group_std_mean": 0.19062794744968414,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018241248652338983,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018241248652338983,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03703403770923615,
"signal/confidence_uniqueness_reward/group_std_mean": 0.056619017571210864,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046292547136545185,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046292547136545185,
"signal/format_reward/centered_abs_mean": 0.015988498367369174,
"signal/format_reward/group_std_mean": 0.030791251361370085,
"signal/format_reward/group_zero_std_frac": 0.8722222328186036,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007994249183684587,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007994249183684587,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018028806429356337,
"signal/frontier_aurc_reward/group_std_mean": 0.0029539034236222505,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.227156230423134e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.227156230423134e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15507982075214385,
"signal/frontier_coverage_1/group_std_mean": 0.2211749255657196,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027759287506341932,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027759287506341932,
"signal/frontier_coverage_10/centered_abs_mean": 0.15507982075214385,
"signal/frontier_coverage_10/group_std_mean": 0.2211749255657196,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027759287506341932,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027759287506341932,
"signal/frontier_coverage_15/centered_abs_mean": 0.15409548580646515,
"signal/frontier_coverage_15/group_std_mean": 0.21988695561885835,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002758309058845043,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002758309058845043,
"signal/frontier_coverage_20/centered_abs_mean": 0.10383160263299943,
"signal/frontier_coverage_20/group_std_mean": 0.15046164393424988,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018585855141282082,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018585855141282082,
"signal/frontier_coverage_25/centered_abs_mean": 0.06960556581616402,
"signal/frontier_coverage_25/group_std_mean": 0.08995560258626938,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012459396151825787,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012459396151825787,
"signal/frontier_coverage_5/centered_abs_mean": 0.15507982075214385,
"signal/frontier_coverage_5/group_std_mean": 0.2211749255657196,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027759287506341932,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027759287506341932,
"signal/frontier_ece_reward/centered_abs_mean": 0.013568481430411339,
"signal/frontier_ece_reward/group_std_mean": 0.017983463406562806,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016960601788014174,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016960601788014174,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_calibration/aurc": 0.12589744753156493,
"eval_calibration/batch_distribution_entropy": 0.792195222040961,
"eval_calibration/buffer_distribution_entropy": 0.9098214718589893,
"eval_calibration/confidence_entropy": 0.42053250993043306,
"eval_calibration/coverage@0%": 0.2567204301075269,
"eval_calibration/coverage@1%": 0.2567204301075269,
"eval_calibration/coverage@10%": 0.43531586021505375,
"eval_calibration/coverage@15%": 0.5924059139784946,
"eval_calibration/coverage@20%": 0.8136760752688174,
"eval_calibration/coverage@25%": 0.9420362903225806,
"eval_calibration/coverage@30%": 0.9895833333333334,
"eval_calibration/coverage@5%": 0.29838709677419356,
"eval_calibration/ece": 0.14721396114449362,
"eval_calibration/mean_confidence": 0.6862037135866892,
"eval_completions/clipped_ratio": 0.006076388888888895,
"eval_completions/max_length": 2771.8333333333335,
"eval_completions/max_terminated_length": 2771.8333333333335,
"eval_completions/mean_length": 1130.8981323242188,
"eval_completions/mean_terminated_length": 1137.8710530598958,
"eval_completions/min_length": 159.83333333333334,
"eval_completions/min_terminated_length": 384.8333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 455878612.0,
"eval_reward": 1.06011829773585,
"eval_reward_std": 0.245634155968825,
"eval_rewards/accuracy_reward": 0.6901041666666666,
"eval_rewards/brier_reward": 0.8112742304801941,
"eval_rewards/confidence_uniqueness_reward": 0.887604296207428,
"eval_rewards/format_reward": 0.9939236144224802,
"eval_rewards/frontier_aurc_reward": -0.001700426151122277,
"eval_rewards/frontier_coverage_1": 0.0397941037081182,
"eval_rewards/frontier_coverage_10": 0.0397941037081182,
"eval_rewards/frontier_coverage_15": 0.039953491340080895,
"eval_rewards/frontier_coverage_20": 0.0386975952424109,
"eval_rewards/frontier_coverage_25": 0.0703319435318311,
"eval_rewards/frontier_coverage_5": 0.0397941037081182,
"eval_rewards/frontier_ece_reward": 0.007770234486088157,
"eval_runtime": 194.5895,
"eval_samples_per_second": 5.139,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4119466145833333,
"eval_signal/accuracy_reward/group_std_mean": 0.45975885291894275,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20597330729166666,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20597330729166666,
"eval_signal/advantage_abs_mean": 0.21174462139606476,
"eval_signal/advantage_pre_scale_abs_mean": 0.21174462139606476,
"eval_signal/advantage_pre_scale_std": 0.24418220420678458,
"eval_signal/advantage_std": 0.24418220420678458,
"eval_signal/brier_reward/centered_abs_mean": 0.21792598068714142,
"eval_signal/brier_reward/group_std_mean": 0.2779506991306941,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027240747585892677,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027240747585892677,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05039315981169542,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07548397406935692,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006299144976461927,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006299144976461927,
"eval_signal/format_reward/centered_abs_mean": 0.01177300326526165,
"eval_signal/format_reward/group_std_mean": 0.034373246133327484,
"eval_signal/format_reward/group_zero_std_frac": 0.8055555721124014,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005886501632630825,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.005886501632630825,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002665710848911355,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004803995058561365,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7716222676778365e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7716222676778365e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.20755265404780707,
"eval_signal/frontier_coverage_1/group_std_mean": 0.33392194906870526,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037151926274721823,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037151926274721823,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.20755265404780707,
"eval_signal/frontier_coverage_10/group_std_mean": 0.33392194906870526,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037151926274721823,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037151926274721823,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.20377135028441748,
"eval_signal/frontier_coverage_15/group_std_mean": 0.3286268611749013,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003647507051937282,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003647507051937282,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.12508864452441534,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2087546760837237,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002239086684615662,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002239086684615662,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09835883850852649,
"eval_signal/frontier_coverage_25/group_std_mean": 0.12412550052007039,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017606231267563999,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017606231267563999,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.20755265404780707,
"eval_signal/frontier_coverage_5/group_std_mean": 0.33392194906870526,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037151926274721823,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037151926274721823,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.01478797197341919,
"eval_signal/frontier_ece_reward/group_std_mean": 0.02143588351706664,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018484964966773987,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018484964966773987,
"eval_steps_per_second": 0.031,
"step": 150
},
{
"calibration/aurc": 0.1592934062862865,
"calibration/batch_distribution_entropy": 0.8200635510914287,
"calibration/buffer_distribution_entropy": 0.9056196886811655,
"calibration/confidence_entropy": 0.4273629669999046,
"calibration/coverage@0%": 0.05090042510789072,
"calibration/coverage@1%": 0.07300568826578545,
"calibration/coverage@10%": 0.4090462757088025,
"calibration/coverage@15%": 0.49287078530957606,
"calibration/coverage@20%": 0.7218659605131165,
"calibration/coverage@25%": 0.7721199035330008,
"calibration/coverage@30%": 0.8353387467191601,
"calibration/coverage@5%": 0.25320305668683807,
"calibration/ece": 0.11551180587161887,
"calibration/mean_confidence": 0.7259954579627551,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00746527777777779,
"completions/max_length": 3773.0,
"completions/max_terminated_length": 3773.0,
"completions/mean_length": 1111.4385498046875,
"completions/mean_terminated_length": 1119.8486083984376,
"completions/min_length": 0.0,
"completions/min_terminated_length": 335.0,
"epoch": 0.3719953500581243,
"grad_norm": 0.0003368077159393579,
"learning_rate": 1.5963855421686747e-06,
"loss": -0.0063,
"num_tokens": 471790096.0,
"reward": 1.0929233312606812,
"reward_std": 0.12424634695053101,
"rewards/accuracy_reward": 0.7425347208976746,
"rewards/brier_reward": 0.8317407846450806,
"rewards/confidence_uniqueness_reward": 0.9393562436103821,
"rewards/format_reward": 0.9925347089767456,
"rewards/frontier_aurc_reward": -0.0012545568635687232,
"rewards/frontier_coverage_1": 0.017057520151138306,
"rewards/frontier_coverage_10": 0.017057520151138306,
"rewards/frontier_coverage_15": 0.017654052283614875,
"rewards/frontier_coverage_20": 0.026030075177550314,
"rewards/frontier_coverage_25": 0.08343757688999176,
"rewards/frontier_coverage_5": 0.017057520151138306,
"rewards/frontier_ece_reward": 0.0066596436314284805,
"signal/accuracy_reward/centered_abs_mean": 0.15950520634651183,
"signal/accuracy_reward/group_std_mean": 0.21293214857578277,
"signal/accuracy_reward/group_zero_std_frac": 0.3777777820825577,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07975260317325591,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07975260317325591,
"signal/advantage_abs_mean": 0.08897002339363098,
"signal/advantage_pre_scale_abs_mean": 0.08897002339363098,
"signal/advantage_pre_scale_std": 0.1588042050600052,
"signal/advantage_std": 0.1588042050600052,
"signal/brier_reward/centered_abs_mean": 0.1306125193834305,
"signal/brier_reward/group_std_mean": 0.17341192066669464,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01632656492292881,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01632656492292881,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034348542988300326,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05231625810265541,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004293567873537541,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004293567873537541,
"signal/format_reward/centered_abs_mean": 0.013487413339316846,
"signal/format_reward/group_std_mean": 0.027320950850844385,
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006743706669658423,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006743706669658423,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001531174755655229,
"signal/frontier_aurc_reward/group_std_mean": 0.002751293499022722,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7408025925979018e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7408025925979018e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13883511126041412,
"signal/frontier_coverage_1/group_std_mean": 0.1974550575017929,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024851484689861537,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024851484689861537,
"signal/frontier_coverage_10/centered_abs_mean": 0.13883511126041412,
"signal/frontier_coverage_10/group_std_mean": 0.1974550575017929,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024851484689861537,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024851484689861537,
"signal/frontier_coverage_15/centered_abs_mean": 0.13508277088403703,
"signal/frontier_coverage_15/group_std_mean": 0.19253535866737365,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024179814849048854,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024179814849048854,
"signal/frontier_coverage_20/centered_abs_mean": 0.07531605064868926,
"signal/frontier_coverage_20/group_std_mean": 0.1094200387597084,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013481572968885303,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013481572968885303,
"signal/frontier_coverage_25/centered_abs_mean": 0.06770254969596863,
"signal/frontier_coverage_25/group_std_mean": 0.08678248971700668,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001211875630542636,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001211875630542636,
"signal/frontier_coverage_5/centered_abs_mean": 0.13883511126041412,
"signal/frontier_coverage_5/group_std_mean": 0.1974550575017929,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024851484689861537,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024851484689861537,
"signal/frontier_ece_reward/centered_abs_mean": 0.01135763879865408,
"signal/frontier_ece_reward/group_std_mean": 0.014954530447721482,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00141970484983176,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00141970484983176,
"step": 155
},
{
"calibration/aurc": 0.16224591980290784,
"calibration/batch_distribution_entropy": 0.7755681714330434,
"calibration/buffer_distribution_entropy": 0.8944977396614957,
"calibration/confidence_entropy": 0.40641718225845425,
"calibration/coverage@0%": 0.021498948757059815,
"calibration/coverage@1%": 0.021498948757059815,
"calibration/coverage@10%": 0.4811618811064847,
"calibration/coverage@15%": 0.7439212993500158,
"calibration/coverage@20%": 0.7811449615918429,
"calibration/coverage@25%": 0.7979112558574157,
"calibration/coverage@30%": 0.8020997375328083,
"calibration/coverage@5%": 0.1755658160530981,
"calibration/ece": 0.10178569662618395,
"calibration/mean_confidence": 0.7464084729023905,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012673611111111139,
"completions/max_length": 3875.6,
"completions/max_terminated_length": 3875.6,
"completions/mean_length": 1103.394287109375,
"completions/mean_terminated_length": 1117.539599609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 307.0,
"epoch": 0.38399520005999926,
"grad_norm": 0.00029608793556690216,
"learning_rate": 1.4457831325301204e-06,
"loss": -0.0106,
"num_tokens": 487588494.0,
"reward": 1.053929877281189,
"reward_std": 0.12981161773204802,
"rewards/accuracy_reward": 0.6743055582046509,
"rewards/brier_reward": 0.8059103727340698,
"rewards/confidence_uniqueness_reward": 0.9313126802444458,
"rewards/format_reward": 0.9873263835906982,
"rewards/frontier_aurc_reward": -0.0018599932780489325,
"rewards/frontier_coverage_1": 0.04324149824678898,
"rewards/frontier_coverage_10": 0.04324149824678898,
"rewards/frontier_coverage_15": 0.042741596698760986,
"rewards/frontier_coverage_20": 0.035604484006762506,
"rewards/frontier_coverage_25": 0.07923954874277114,
"rewards/frontier_coverage_5": 0.04324149824678898,
"rewards/frontier_ece_reward": 0.006811666022986174,
"signal/accuracy_reward/centered_abs_mean": 0.1576822891831398,
"signal/accuracy_reward/group_std_mean": 0.2048851728439331,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0788411445915699,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0788411445915699,
"signal/advantage_abs_mean": 0.09569884538650512,
"signal/advantage_pre_scale_abs_mean": 0.09569884538650512,
"signal/advantage_pre_scale_std": 0.1690044015645981,
"signal/advantage_std": 0.1690044015645981,
"signal/brier_reward/centered_abs_mean": 0.14289563298225402,
"signal/brier_reward/group_std_mean": 0.18329527378082275,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017861954122781753,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017861954122781753,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.042262401431798935,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06386898383498192,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005282800178974867,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005282800178974867,
"signal/format_reward/centered_abs_mean": 0.02110460065305233,
"signal/format_reward/group_std_mean": 0.038827139884233475,
"signal/format_reward/group_zero_std_frac": 0.8416666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010552300326526164,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010552300326526164,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020189554430544376,
"signal/frontier_aurc_reward/group_std_mean": 0.003409457951784134,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.613930239225738e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.613930239225738e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14031126350164413,
"signal/frontier_coverage_1/group_std_mean": 0.19471052289009094,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002511571627110243,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002511571627110243,
"signal/frontier_coverage_10/centered_abs_mean": 0.14031126350164413,
"signal/frontier_coverage_10/group_std_mean": 0.19471052289009094,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002511571627110243,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002511571627110243,
"signal/frontier_coverage_15/centered_abs_mean": 0.13545251190662383,
"signal/frontier_coverage_15/group_std_mean": 0.1884155750274658,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024245998822152613,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024245998822152613,
"signal/frontier_coverage_20/centered_abs_mean": 0.07455982491374016,
"signal/frontier_coverage_20/group_std_mean": 0.10475812703371049,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013346209190785885,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013346209190785885,
"signal/frontier_coverage_25/centered_abs_mean": 0.07323736399412155,
"signal/frontier_coverage_25/group_std_mean": 0.09230298697948455,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013109487714245915,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013109487714245915,
"signal/frontier_coverage_5/centered_abs_mean": 0.14031126350164413,
"signal/frontier_coverage_5/group_std_mean": 0.19471052289009094,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002511571627110243,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002511571627110243,
"signal/frontier_ece_reward/centered_abs_mean": 0.010905621573328972,
"signal/frontier_ece_reward/group_std_mean": 0.01399848610162735,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013632026966661215,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013632026966661215,
"step": 160
},
{
"calibration/aurc": 0.18802935979054125,
"calibration/batch_distribution_entropy": 0.8066304456688098,
"calibration/buffer_distribution_entropy": 0.8820289058028152,
"calibration/confidence_entropy": 0.39912895218336397,
"calibration/coverage@0%": 0.01574846717219552,
"calibration/coverage@1%": 0.01574846717219552,
"calibration/coverage@10%": 0.23960081124989277,
"calibration/coverage@15%": 0.5227776790286016,
"calibration/coverage@20%": 0.6225712078939487,
"calibration/coverage@25%": 0.6983694035928395,
"calibration/coverage@30%": 0.8085135795463503,
"calibration/coverage@5%": 0.04206425664587973,
"calibration/ece": 0.1298895751168595,
"calibration/mean_confidence": 0.7157046755256667,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012847222222222232,
"completions/max_length": 3910.0,
"completions/max_terminated_length": 3910.0,
"completions/mean_length": 1123.602978515625,
"completions/mean_terminated_length": 1138.41201171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 332.8,
"epoch": 0.39599505006187424,
"grad_norm": 0.00032429303973913193,
"learning_rate": 1.2951807228915664e-06,
"loss": -0.0107,
"num_tokens": 503671472.0,
"reward": 1.053611421585083,
"reward_std": 0.12947496324777602,
"rewards/accuracy_reward": 0.6701388955116272,
"rewards/brier_reward": 0.8113029956817627,
"rewards/confidence_uniqueness_reward": 0.9313131213188172,
"rewards/format_reward": 0.986718761920929,
"rewards/frontier_aurc_reward": -0.0019012225093320012,
"rewards/frontier_coverage_1": 0.0559473067522049,
"rewards/frontier_coverage_10": 0.0559473067522049,
"rewards/frontier_coverage_15": 0.05543726235628128,
"rewards/frontier_coverage_20": 0.044922591745853425,
"rewards/frontier_coverage_25": 0.0935636967420578,
"rewards/frontier_coverage_5": 0.0559473067522049,
"rewards/frontier_ece_reward": 0.007312025129795075,
"signal/accuracy_reward/centered_abs_mean": 0.14447699785232543,
"signal/accuracy_reward/group_std_mean": 0.19804940819740297,
"signal/accuracy_reward/group_zero_std_frac": 0.4055555522441864,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07223849892616271,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07223849892616271,
"signal/advantage_abs_mean": 0.09204500317573547,
"signal/advantage_pre_scale_abs_mean": 0.09204500317573547,
"signal/advantage_pre_scale_std": 0.16653842926025392,
"signal/advantage_std": 0.16653842926025392,
"signal/brier_reward/centered_abs_mean": 0.14590578377246857,
"signal/brier_reward/group_std_mean": 0.1900397479534149,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01823822297155857,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01823822297155857,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04235764890909195,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06415605992078781,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005294706113636494,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005294706113636494,
"signal/format_reward/centered_abs_mean": 0.022314453125,
"signal/format_reward/group_std_mean": 0.04004841782152653,
"signal/format_reward/group_zero_std_frac": 0.8416666865348816,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0111572265625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0111572265625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023163022473454476,
"signal/frontier_aurc_reward/group_std_mean": 0.00396724371239543,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1461809087195436e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1461809087195436e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14388979077339173,
"signal/frontier_coverage_1/group_std_mean": 0.2028069317340851,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002575627202168107,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002575627202168107,
"signal/frontier_coverage_10/centered_abs_mean": 0.14388979077339173,
"signal/frontier_coverage_10/group_std_mean": 0.2028069317340851,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002575627202168107,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002575627202168107,
"signal/frontier_coverage_15/centered_abs_mean": 0.13867741376161574,
"signal/frontier_coverage_15/group_std_mean": 0.19579787254333497,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024823257233947517,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024823257233947517,
"signal/frontier_coverage_20/centered_abs_mean": 0.07798126637935639,
"signal/frontier_coverage_20/group_std_mean": 0.10919245183467866,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013958646217361093,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013958646217361093,
"signal/frontier_coverage_25/centered_abs_mean": 0.07589124590158462,
"signal/frontier_coverage_25/group_std_mean": 0.09610055834054947,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013584532076492905,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013584532076492905,
"signal/frontier_coverage_5/centered_abs_mean": 0.14388979077339173,
"signal/frontier_coverage_5/group_std_mean": 0.2028069317340851,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002575627202168107,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002575627202168107,
"signal/frontier_ece_reward/centered_abs_mean": 0.010885684378445148,
"signal/frontier_ece_reward/group_std_mean": 0.013944818638265133,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013607105473056435,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013607105473056435,
"step": 165
},
{
"calibration/aurc": 0.14454796269342324,
"calibration/batch_distribution_entropy": 0.7588273654201286,
"calibration/buffer_distribution_entropy": 0.8704741506910846,
"calibration/confidence_entropy": 0.36874330606179995,
"calibration/coverage@0%": 0.009399541884816754,
"calibration/coverage@1%": 0.009399541884816754,
"calibration/coverage@10%": 0.32980529835742545,
"calibration/coverage@15%": 0.6194528392530796,
"calibration/coverage@20%": 0.8288237113841952,
"calibration/coverage@25%": 0.919738063236047,
"calibration/coverage@30%": 0.9756613756613757,
"calibration/coverage@5%": 0.0987727704833249,
"calibration/ece": 0.09647885897278856,
"calibration/mean_confidence": 0.7515438098948212,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011111111111111117,
"completions/max_length": 3841.0,
"completions/max_terminated_length": 3841.0,
"completions/mean_length": 1090.309814453125,
"completions/mean_terminated_length": 1102.6180908203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 284.6,
"epoch": 0.4079949000637492,
"grad_norm": 0.0003373819636180997,
"learning_rate": 1.1445783132530121e-06,
"loss": -0.0087,
"num_tokens": 519321025.0,
"reward": 1.0796474456787108,
"reward_std": 0.12459334284067154,
"rewards/accuracy_reward": 0.7208333492279053,
"rewards/brier_reward": 0.8231194734573364,
"rewards/confidence_uniqueness_reward": 0.9316921472549439,
"rewards/format_reward": 0.9887152791023255,
"rewards/frontier_aurc_reward": -0.001801234926097095,
"rewards/frontier_coverage_1": 0.029103067331016065,
"rewards/frontier_coverage_10": 0.029103067331016065,
"rewards/frontier_coverage_15": 0.02983991215005517,
"rewards/frontier_coverage_20": 0.036224594712257384,
"rewards/frontier_coverage_25": 0.11188144534826279,
"rewards/frontier_coverage_5": 0.029103067331016065,
"rewards/frontier_ece_reward": 0.006446403171867132,
"signal/accuracy_reward/centered_abs_mean": 0.14637586772441863,
"signal/accuracy_reward/group_std_mean": 0.1990185409784317,
"signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07318793386220931,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07318793386220931,
"signal/advantage_abs_mean": 0.08974390029907227,
"signal/advantage_pre_scale_abs_mean": 0.08974390029907227,
"signal/advantage_pre_scale_std": 0.16255992650985718,
"signal/advantage_std": 0.16255992650985718,
"signal/brier_reward/centered_abs_mean": 0.13866532742977142,
"signal/brier_reward/group_std_mean": 0.18006704449653627,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017333165928721427,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017333165928721427,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040852177888154984,
"signal/confidence_uniqueness_reward/group_std_mean": 0.059870512783527376,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005106522236019373,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005106522236019373,
"signal/format_reward/centered_abs_mean": 0.018869357742369176,
"signal/format_reward/group_std_mean": 0.033280248194932936,
"signal/format_reward/group_zero_std_frac": 0.8694444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009434678871184588,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009434678871184588,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022833514027297496,
"signal/frontier_aurc_reward/group_std_mean": 0.00370091856457293,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.087198940396775e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.087198940396775e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1318504437804222,
"signal/frontier_coverage_1/group_std_mean": 0.18823845088481903,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023601229302585127,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023601229302585127,
"signal/frontier_coverage_10/centered_abs_mean": 0.1318504437804222,
"signal/frontier_coverage_10/group_std_mean": 0.18823845088481903,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023601229302585127,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023601229302585127,
"signal/frontier_coverage_15/centered_abs_mean": 0.1259125664830208,
"signal/frontier_coverage_15/group_std_mean": 0.18023517727851868,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002253834856674075,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002253834856674075,
"signal/frontier_coverage_20/centered_abs_mean": 0.06886096596717835,
"signal/frontier_coverage_20/group_std_mean": 0.09653576016426087,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012326112482696772,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012326112482696772,
"signal/frontier_coverage_25/centered_abs_mean": 0.08277640789747238,
"signal/frontier_coverage_25/group_std_mean": 0.10427123010158539,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014816976618021727,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014816976618021727,
"signal/frontier_coverage_5/centered_abs_mean": 0.1318504437804222,
"signal/frontier_coverage_5/group_std_mean": 0.18823845088481903,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023601229302585127,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023601229302585127,
"signal/frontier_ece_reward/centered_abs_mean": 0.010013974830508232,
"signal/frontier_ece_reward/group_std_mean": 0.012912089005112648,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001251746853813529,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001251746853813529,
"step": 170
},
{
"calibration/aurc": 0.13834606680918402,
"calibration/batch_distribution_entropy": 0.8186999997361368,
"calibration/buffer_distribution_entropy": 0.8639091261459949,
"calibration/confidence_entropy": 0.3837927440740244,
"calibration/coverage@0%": 0.031790703050268966,
"calibration/coverage@1%": 0.031790703050268966,
"calibration/coverage@10%": 0.39925962188917974,
"calibration/coverage@15%": 0.6025120475540564,
"calibration/coverage@20%": 0.8262619502896491,
"calibration/coverage@25%": 0.9115204932253234,
"calibration/coverage@30%": 0.9701405779774911,
"calibration/coverage@5%": 0.1683305428786683,
"calibration/ece": 0.08496535814356126,
"calibration/mean_confidence": 0.7056412285906399,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012760416666666673,
"completions/max_length": 3912.8,
"completions/max_terminated_length": 3912.8,
"completions/mean_length": 1137.7668701171874,
"completions/mean_terminated_length": 1152.48974609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 314.2,
"epoch": 0.4199947500656242,
"grad_norm": 0.00032794420258142054,
"learning_rate": 9.93975903614458e-07,
"loss": -0.0129,
"num_tokens": 535536067.0,
"reward": 1.0705443382263184,
"reward_std": 0.13408834338188172,
"rewards/accuracy_reward": 0.7054687380790711,
"rewards/brier_reward": 0.8158458828926086,
"rewards/confidence_uniqueness_reward": 0.9295023679733276,
"rewards/format_reward": 0.9870659708976746,
"rewards/frontier_aurc_reward": -0.0016766191460192204,
"rewards/frontier_coverage_1": 0.03425406012684107,
"rewards/frontier_coverage_10": 0.03425406012684107,
"rewards/frontier_coverage_15": 0.03412420265376568,
"rewards/frontier_coverage_20": 0.039517082273960114,
"rewards/frontier_coverage_25": 0.12406121045351029,
"rewards/frontier_coverage_5": 0.03425406012684107,
"rewards/frontier_ece_reward": 0.0060807295143604275,
"signal/accuracy_reward/centered_abs_mean": 0.15820854902267456,
"signal/accuracy_reward/group_std_mean": 0.21389053761959076,
"signal/accuracy_reward/group_zero_std_frac": 0.36666667461395264,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07910427451133728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07910427451133728,
"signal/advantage_abs_mean": 0.09520394802093506,
"signal/advantage_pre_scale_abs_mean": 0.09520394802093506,
"signal/advantage_pre_scale_std": 0.17199627161026002,
"signal/advantage_std": 0.17199627161026002,
"signal/brier_reward/centered_abs_mean": 0.14463236629962922,
"signal/brier_reward/group_std_mean": 0.19018857181072235,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018079045787453653,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018079045787453653,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04297049716114998,
"signal/confidence_uniqueness_reward/group_std_mean": 0.066465725004673,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005371312145143747,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005371312145143747,
"signal/format_reward/centered_abs_mean": 0.022140841744840146,
"signal/format_reward/group_std_mean": 0.04171677567064762,
"signal/format_reward/group_zero_std_frac": 0.8277777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011070420872420073,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011070420872420073,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022307957988232374,
"signal/frontier_aurc_reward/group_std_mean": 0.0038668690249323845,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9931246283231304e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9931246283231304e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14078425168991088,
"signal/frontier_coverage_1/group_std_mean": 0.204370379447937,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002520037954673171,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002520037954673171,
"signal/frontier_coverage_10/centered_abs_mean": 0.14078425168991088,
"signal/frontier_coverage_10/group_std_mean": 0.204370379447937,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002520037954673171,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002520037954673171,
"signal/frontier_coverage_15/centered_abs_mean": 0.13343214988708496,
"signal/frontier_coverage_15/group_std_mean": 0.19431246519088746,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002388435346074402,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002388435346074402,
"signal/frontier_coverage_20/centered_abs_mean": 0.07233781591057778,
"signal/frontier_coverage_20/group_std_mean": 0.10254417657852173,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001294846786186099,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001294846786186099,
"signal/frontier_coverage_25/centered_abs_mean": 0.08985466808080673,
"signal/frontier_coverage_25/group_std_mean": 0.11489148437976837,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016083985101431608,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016083985101431608,
"signal/frontier_coverage_5/centered_abs_mean": 0.14078425168991088,
"signal/frontier_coverage_5/group_std_mean": 0.204370379447937,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002520037954673171,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002520037954673171,
"signal/frontier_ece_reward/centered_abs_mean": 0.010127259977161884,
"signal/frontier_ece_reward/group_std_mean": 0.013339119404554367,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012659074971452355,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012659074971452355,
"step": 175
},
{
"calibration/aurc": 0.1078750692864375,
"calibration/batch_distribution_entropy": 0.7768469934330399,
"calibration/buffer_distribution_entropy": 0.8621689599316819,
"calibration/confidence_entropy": 0.3939859688531258,
"calibration/coverage@0%": 0.06972233539436981,
"calibration/coverage@1%": 0.06972233539436981,
"calibration/coverage@10%": 0.5525677745061607,
"calibration/coverage@15%": 0.7335402018406365,
"calibration/coverage@20%": 0.8665403953716891,
"calibration/coverage@25%": 0.9559681697612732,
"calibration/coverage@30%": 0.9702917771883289,
"calibration/coverage@5%": 0.27872282790800273,
"calibration/ece": 0.08607349569510638,
"calibration/mean_confidence": 0.7374003617662448,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014149305555555537,
"completions/max_length": 3998.6,
"completions/max_terminated_length": 3998.6,
"completions/mean_length": 1096.2947998046875,
"completions/mean_terminated_length": 1111.9990234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 338.8,
"epoch": 0.4319946000674992,
"grad_norm": 0.00031256891088560224,
"learning_rate": 8.433734939759036e-07,
"loss": -0.012,
"num_tokens": 551265351.0,
"reward": 1.0728761911392213,
"reward_std": 0.12901873141527176,
"rewards/accuracy_reward": 0.7154513835906983,
"rewards/brier_reward": 0.810655677318573,
"rewards/confidence_uniqueness_reward": 0.9245447874069214,
"rewards/format_reward": 0.9855902791023254,
"rewards/frontier_aurc_reward": -0.0020415371283888815,
"rewards/frontier_coverage_1": 0.022566875419579448,
"rewards/frontier_coverage_10": 0.02261007858905941,
"rewards/frontier_coverage_15": 0.02267046067863703,
"rewards/frontier_coverage_20": 0.035761307924985886,
"rewards/frontier_coverage_25": 0.14290719628334045,
"rewards/frontier_coverage_5": 0.022566875419579448,
"rewards/frontier_ece_reward": 0.005401916056871414,
"signal/accuracy_reward/centered_abs_mean": 0.14884982407093048,
"signal/accuracy_reward/group_std_mean": 0.19917932748794556,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07442491203546524,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07442491203546524,
"signal/advantage_abs_mean": 0.09345675855875016,
"signal/advantage_pre_scale_abs_mean": 0.09345675855875016,
"signal/advantage_pre_scale_std": 0.16980061829090118,
"signal/advantage_std": 0.16980061829090118,
"signal/brier_reward/centered_abs_mean": 0.13750295341014862,
"signal/brier_reward/group_std_mean": 0.17973470985889434,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017187869176268578,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017187869176268578,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.044607821851968765,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06631096750497818,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005575977731496096,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005575977731496096,
"signal/format_reward/centered_abs_mean": 0.02207031212747097,
"signal/format_reward/group_std_mean": 0.03964213021099568,
"signal/format_reward/group_zero_std_frac": 0.8388888835906982,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011035156063735485,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011035156063735485,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025864802300930023,
"signal/frontier_aurc_reward/group_std_mean": 0.0044010514859110115,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.629799441318028e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.629799441318028e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12913522273302078,
"signal/frontier_coverage_1/group_std_mean": 0.18910902738571167,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023115205112844706,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023115205112844706,
"signal/frontier_coverage_10/centered_abs_mean": 0.12895990014076233,
"signal/frontier_coverage_10/group_std_mean": 0.18887372612953185,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002308382326737046,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002308382326737046,
"signal/frontier_coverage_15/centered_abs_mean": 0.1184653490781784,
"signal/frontier_coverage_15/group_std_mean": 0.17463470101356507,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002120529650710523,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002120529650710523,
"signal/frontier_coverage_20/centered_abs_mean": 0.06513682901859283,
"signal/frontier_coverage_20/group_std_mean": 0.09168828129768372,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011659492505714298,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011659492505714298,
"signal/frontier_coverage_25/centered_abs_mean": 0.10031676590442658,
"signal/frontier_coverage_25/group_std_mean": 0.1278284102678299,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017956699943169952,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017956699943169952,
"signal/frontier_coverage_5/centered_abs_mean": 0.12913522273302078,
"signal/frontier_coverage_5/group_std_mean": 0.18910902738571167,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023115205112844706,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023115205112844706,
"signal/frontier_ece_reward/centered_abs_mean": 0.009569591842591763,
"signal/frontier_ece_reward/group_std_mean": 0.012618933990597724,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011961989803239703,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011961989803239703,
"step": 180
},
{
"calibration/aurc": 0.1905658711031758,
"calibration/batch_distribution_entropy": 0.7511575429186707,
"calibration/buffer_distribution_entropy": 0.8596426578480262,
"calibration/confidence_entropy": 0.38262879225186686,
"calibration/coverage@0%": 0.012010443864229765,
"calibration/coverage@1%": 0.012010443864229765,
"calibration/coverage@10%": 0.18892950391644908,
"calibration/coverage@15%": 0.5163960795428519,
"calibration/coverage@20%": 0.7123345333173191,
"calibration/coverage@25%": 0.7931292530992373,
"calibration/coverage@30%": 0.8088772845953003,
"calibration/coverage@5%": 0.09822729146626356,
"calibration/ece": 0.10218960656261426,
"calibration/mean_confidence": 0.7544263684590896,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012413194444444442,
"completions/max_length": 4026.4,
"completions/max_terminated_length": 4026.4,
"completions/mean_length": 1112.1637451171875,
"completions/mean_terminated_length": 1126.181298828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 306.6,
"epoch": 0.44399445006937416,
"grad_norm": 0.0003794998920056969,
"learning_rate": 6.927710843373495e-07,
"loss": -0.0089,
"num_tokens": 567167525.0,
"reward": 1.0585483074188233,
"reward_std": 0.13896091282367706,
"rewards/accuracy_reward": 0.68046875,
"rewards/brier_reward": 0.8080840945243836,
"rewards/confidence_uniqueness_reward": 0.9286048769950866,
"rewards/format_reward": 0.9873263835906982,
"rewards/frontier_aurc_reward": -0.002264450001530349,
"rewards/frontier_coverage_1": 0.04679740741848946,
"rewards/frontier_coverage_10": 0.04667307548224926,
"rewards/frontier_coverage_15": 0.045523762702941895,
"rewards/frontier_coverage_20": 0.046609895676374434,
"rewards/frontier_coverage_25": 0.15252834260463716,
"rewards/frontier_coverage_5": 0.04679740741848946,
"rewards/frontier_ece_reward": 0.005719311535358429,
"signal/accuracy_reward/centered_abs_mean": 0.16688910722732545,
"signal/accuracy_reward/group_std_mean": 0.2191845268011093,
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08344455361366272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08344455361366272,
"signal/advantage_abs_mean": 0.10244468003511428,
"signal/advantage_pre_scale_abs_mean": 0.10244468003511428,
"signal/advantage_pre_scale_std": 0.17401386499404908,
"signal/advantage_std": 0.17401386499404908,
"signal/brier_reward/centered_abs_mean": 0.1530242681503296,
"signal/brier_reward/group_std_mean": 0.19397515952587127,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0191280335187912,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0191280335187912,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04111187309026718,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0640983261168003,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005138984136283398,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005138984136283398,
"signal/format_reward/centered_abs_mean": 0.02018229141831398,
"signal/format_reward/group_std_mean": 0.03944449722766876,
"signal/format_reward/group_zero_std_frac": 0.830555546283722,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01009114570915699,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01009114570915699,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002950982470065355,
"signal/frontier_aurc_reward/group_std_mean": 0.004983780579641462,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.282258280203678e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.282258280203678e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14425066113471985,
"signal/frontier_coverage_1/group_std_mean": 0.20719496309757232,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025820867624133824,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025820867624133824,
"signal/frontier_coverage_10/centered_abs_mean": 0.14380578100681304,
"signal/frontier_coverage_10/group_std_mean": 0.20660340785980225,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002574123442173004,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002574123442173004,
"signal/frontier_coverage_15/centered_abs_mean": 0.12744852900505066,
"signal/frontier_coverage_15/group_std_mean": 0.18429652452468873,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022813286632299423,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022813286632299423,
"signal/frontier_coverage_20/centered_abs_mean": 0.07100907564163209,
"signal/frontier_coverage_20/group_std_mean": 0.0970319539308548,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001271062414161861,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001271062414161861,
"signal/frontier_coverage_25/centered_abs_mean": 0.11521650403738022,
"signal/frontier_coverage_25/group_std_mean": 0.1455530434846878,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020623753778636457,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020623753778636457,
"signal/frontier_coverage_5/centered_abs_mean": 0.14425066113471985,
"signal/frontier_coverage_5/group_std_mean": 0.20719496309757232,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025820867624133824,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025820867624133824,
"signal/frontier_ece_reward/centered_abs_mean": 0.00979881975799799,
"signal/frontier_ece_reward/group_std_mean": 0.012976471707224846,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012248524697497487,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012248524697497487,
"step": 185
},
{
"calibration/aurc": 0.16030641773857301,
"calibration/batch_distribution_entropy": 0.7187946309257993,
"calibration/buffer_distribution_entropy": 0.8544406875935249,
"calibration/confidence_entropy": 0.35407681338799846,
"calibration/coverage@0%": 0.06611951878057401,
"calibration/coverage@1%": 0.06611951878057401,
"calibration/coverage@10%": 0.3717757925491453,
"calibration/coverage@15%": 0.43330493174863155,
"calibration/coverage@20%": 0.724023008873238,
"calibration/coverage@25%": 0.8848319190600522,
"calibration/coverage@30%": 0.9640407419495214,
"calibration/coverage@5%": 0.19347886064294592,
"calibration/ece": 0.10143865496049813,
"calibration/mean_confidence": 0.7697417873464006,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009114583333333304,
"completions/max_length": 3704.2,
"completions/max_terminated_length": 3704.2,
"completions/mean_length": 1097.49306640625,
"completions/mean_terminated_length": 1107.6218994140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 304.2,
"epoch": 0.45599430007124914,
"grad_norm": 0.00037333081127144396,
"learning_rate": 5.421686746987952e-07,
"loss": -0.0071,
"num_tokens": 582893589.0,
"reward": 1.0882224082946776,
"reward_std": 0.12903861999511718,
"rewards/accuracy_reward": 0.7315972208976745,
"rewards/brier_reward": 0.8292945265769959,
"rewards/confidence_uniqueness_reward": 0.9284190654754638,
"rewards/format_reward": 0.9907986044883728,
"rewards/frontier_aurc_reward": -0.0020672645885497333,
"rewards/frontier_coverage_1": 0.03171844305470586,
"rewards/frontier_coverage_10": 0.03166983062401414,
"rewards/frontier_coverage_15": 0.03243658747524023,
"rewards/frontier_coverage_20": 0.04914712235331535,
"rewards/frontier_coverage_25": 0.19652676284313203,
"rewards/frontier_coverage_5": 0.03171844305470586,
"rewards/frontier_ece_reward": 0.005333344265818596,
"signal/accuracy_reward/centered_abs_mean": 0.16028645932674407,
"signal/accuracy_reward/group_std_mean": 0.21160895824432374,
"signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08014322966337203,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08014322966337203,
"signal/advantage_abs_mean": 0.09539956450462342,
"signal/advantage_pre_scale_abs_mean": 0.09539956450462342,
"signal/advantage_pre_scale_std": 0.16672308146953582,
"signal/advantage_std": 0.16672308146953582,
"signal/brier_reward/centered_abs_mean": 0.13682476282119752,
"signal/brier_reward/group_std_mean": 0.18165784180164338,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01710309535264969,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01710309535264969,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03870115876197815,
"signal/confidence_uniqueness_reward/group_std_mean": 0.056563211232423784,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004837644845247268,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004837644845247268,
"signal/format_reward/centered_abs_mean": 0.015277777425944805,
"signal/format_reward/group_std_mean": 0.02817566618323326,
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007638888712972402,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007638888712972402,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026700781658291818,
"signal/frontier_aurc_reward/group_std_mean": 0.004712453950196505,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.779439841513522e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.779439841513522e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13049551248550414,
"signal/frontier_coverage_1/group_std_mean": 0.19274420142173768,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002335869614034891,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002335869614034891,
"signal/frontier_coverage_10/centered_abs_mean": 0.12981376349925994,
"signal/frontier_coverage_10/group_std_mean": 0.191838139295578,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002323666214942932,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002323666214942932,
"signal/frontier_coverage_15/centered_abs_mean": 0.11072092205286026,
"signal/frontier_coverage_15/group_std_mean": 0.16516720950603486,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019819044275209306,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019819044275209306,
"signal/frontier_coverage_20/centered_abs_mean": 0.06475888639688492,
"signal/frontier_coverage_20/group_std_mean": 0.08885233402252198,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011591840535402297,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011591840535402297,
"signal/frontier_coverage_25/centered_abs_mean": 0.12515371143817902,
"signal/frontier_coverage_25/group_std_mean": 0.15882596969604493,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002240251423791051,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002240251423791051,
"signal/frontier_coverage_5/centered_abs_mean": 0.13049551248550414,
"signal/frontier_coverage_5/group_std_mean": 0.19274420142173768,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002335869614034891,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335869614034891,
"signal/frontier_ece_reward/centered_abs_mean": 0.008784758672118188,
"signal/frontier_ece_reward/group_std_mean": 0.011800924316048622,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010980948340147735,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010980948340147735,
"step": 190
},
{
"calibration/aurc": 0.19186302721828583,
"calibration/batch_distribution_entropy": 0.7846974041619017,
"calibration/buffer_distribution_entropy": 0.8455623219637705,
"calibration/confidence_entropy": 0.36892445483210545,
"calibration/coverage@0%": 0.011578947368421053,
"calibration/coverage@1%": 0.011578947368421053,
"calibration/coverage@10%": 0.28315277511204595,
"calibration/coverage@15%": 0.4408358028815268,
"calibration/coverage@20%": 0.5483091688800866,
"calibration/coverage@25%": 0.6822674896809898,
"calibration/coverage@30%": 0.8453754176191314,
"calibration/coverage@5%": 0.088427923594888,
"calibration/ece": 0.12225029143210824,
"calibration/mean_confidence": 0.7202651758038046,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011892361111111116,
"completions/max_length": 3729.2,
"completions/max_terminated_length": 3729.2,
"completions/mean_length": 1130.487158203125,
"completions/mean_terminated_length": 1144.25771484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 268.8,
"epoch": 0.46799415007312406,
"grad_norm": 0.00035123800626024604,
"learning_rate": 3.91566265060241e-07,
"loss": -0.0106,
"num_tokens": 598997665.0,
"reward": 1.0558034181594849,
"reward_std": 0.13249356299638748,
"rewards/accuracy_reward": 0.676649296283722,
"rewards/brier_reward": 0.7974535226821899,
"rewards/confidence_uniqueness_reward": 0.9281538367271424,
"rewards/format_reward": 0.9881076335906982,
"rewards/frontier_aurc_reward": -0.002629127446562052,
"rewards/frontier_coverage_1": 0.0432399183511734,
"rewards/frontier_coverage_10": 0.043241331726312636,
"rewards/frontier_coverage_15": 0.04129153192043304,
"rewards/frontier_coverage_20": 0.04897095337510109,
"rewards/frontier_coverage_25": 0.17610829174518586,
"rewards/frontier_coverage_5": 0.0432399183511734,
"rewards/frontier_ece_reward": 0.005448419880121946,
"signal/accuracy_reward/centered_abs_mean": 0.1535861536860466,
"signal/accuracy_reward/group_std_mean": 0.20312528014183046,
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0767930768430233,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0767930768430233,
"signal/advantage_abs_mean": 0.09537107646465301,
"signal/advantage_pre_scale_abs_mean": 0.09537107646465301,
"signal/advantage_pre_scale_std": 0.1679329752922058,
"signal/advantage_std": 0.1679329752922058,
"signal/brier_reward/centered_abs_mean": 0.15391360223293304,
"signal/brier_reward/group_std_mean": 0.2022032171487808,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01923920027911663,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01923920027911663,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.041540400683879854,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06364303082227707,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005192550085484982,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005192550085484982,
"signal/format_reward/centered_abs_mean": 0.020144314132630826,
"signal/format_reward/group_std_mean": 0.03839278891682625,
"signal/format_reward/group_zero_std_frac": 0.8444444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010072157066315413,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010072157066315413,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032725514844059943,
"signal/frontier_aurc_reward/group_std_mean": 0.005476425681263208,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.857866490259767e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.857866490259767e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14388231039047242,
"signal/frontier_coverage_1/group_std_mean": 0.21113406419754027,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00257549318484962,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00257549318484962,
"signal/frontier_coverage_10/centered_abs_mean": 0.14277739822864532,
"signal/frontier_coverage_10/group_std_mean": 0.20962927639484405,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002555715246126056,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002555715246126056,
"signal/frontier_coverage_15/centered_abs_mean": 0.12085210084915161,
"signal/frontier_coverage_15/group_std_mean": 0.17947884798049926,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021632524440065026,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021632524440065026,
"signal/frontier_coverage_20/centered_abs_mean": 0.07110893502831458,
"signal/frontier_coverage_20/group_std_mean": 0.09753008931875229,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012728499248623848,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012728499248623848,
"signal/frontier_coverage_25/centered_abs_mean": 0.13257131576538086,
"signal/frontier_coverage_25/group_std_mean": 0.16897372305393218,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023730265442281962,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023730265442281962,
"signal/frontier_coverage_5/centered_abs_mean": 0.14388231039047242,
"signal/frontier_coverage_5/group_std_mean": 0.21113406419754027,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00257549318484962,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00257549318484962,
"signal/frontier_ece_reward/centered_abs_mean": 0.009696776419878006,
"signal/frontier_ece_reward/group_std_mean": 0.013027152419090271,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012120970524847508,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012120970524847508,
"step": 195
},
{
"calibration/aurc": 0.15151234797804142,
"calibration/batch_distribution_entropy": 0.735099965996475,
"calibration/buffer_distribution_entropy": 0.8387114189183977,
"calibration/confidence_entropy": 0.37467308471707284,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.33631391158785745,
"calibration/coverage@15%": 0.6785776451572796,
"calibration/coverage@20%": 0.7966761987649715,
"calibration/coverage@25%": 0.8593974056115048,
"calibration/coverage@30%": 0.9433862433862433,
"calibration/coverage@5%": 0.13511235377562142,
"calibration/ece": 0.10459575666056162,
"calibration/mean_confidence": 0.7707032420511629,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009114583333333348,
"completions/max_length": 3905.4,
"completions/max_terminated_length": 3905.4,
"completions/mean_length": 1109.669580078125,
"completions/mean_terminated_length": 1119.9466552734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 277.4,
"epoch": 0.47999400007499904,
"grad_norm": 0.000335763645125553,
"learning_rate": 2.409638554216868e-07,
"loss": -0.0077,
"num_tokens": 614848866.0,
"reward": 1.071366262435913,
"reward_std": 0.1267540842294693,
"rewards/accuracy_reward": 0.6993055582046509,
"rewards/brier_reward": 0.8146753191947937,
"rewards/confidence_uniqueness_reward": 0.9305232167243958,
"rewards/format_reward": 0.990625,
"rewards/frontier_aurc_reward": -0.002462388901039958,
"rewards/frontier_coverage_1": 0.04432640373706818,
"rewards/frontier_coverage_10": 0.04419338628649712,
"rewards/frontier_coverage_15": 0.04251908585429191,
"rewards/frontier_coverage_20": 0.05172077566385269,
"rewards/frontier_coverage_25": 0.19664104282855988,
"rewards/frontier_coverage_5": 0.04432640373706818,
"rewards/frontier_ece_reward": 0.005684023071080446,
"signal/accuracy_reward/centered_abs_mean": 0.14978298395872117,
"signal/accuracy_reward/group_std_mean": 0.1975135862827301,
"signal/accuracy_reward/group_zero_std_frac": 0.43888888955116273,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07489149197936058,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07489149197936058,
"signal/advantage_abs_mean": 0.09207341372966767,
"signal/advantage_pre_scale_abs_mean": 0.09207341372966767,
"signal/advantage_pre_scale_std": 0.1636202573776245,
"signal/advantage_std": 0.1636202573776245,
"signal/brier_reward/centered_abs_mean": 0.14286952763795852,
"signal/brier_reward/group_std_mean": 0.18820186257362365,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017858690954744814,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017858690954744814,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03856443092226982,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05923122763633728,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0048205538652837275,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0048205538652837275,
"signal/format_reward/centered_abs_mean": 0.016167534701526164,
"signal/format_reward/group_std_mean": 0.03279493264853954,
"signal/format_reward/group_zero_std_frac": 0.8555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008083767350763082,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008083767350763082,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003109201230108738,
"signal/frontier_aurc_reward/group_std_mean": 0.005194711685180664,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.565470055444166e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.565470055444166e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1378859430551529,
"signal/frontier_coverage_1/group_std_mean": 0.20053677260875702,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024681583512574435,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024681583512574435,
"signal/frontier_coverage_10/centered_abs_mean": 0.13636807948350907,
"signal/frontier_coverage_10/group_std_mean": 0.19854426085948945,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024409884586930274,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024409884586930274,
"signal/frontier_coverage_15/centered_abs_mean": 0.11440031677484512,
"signal/frontier_coverage_15/group_std_mean": 0.16812679767608643,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002047765627503395,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002047765627503395,
"signal/frontier_coverage_20/centered_abs_mean": 0.06679626852273941,
"signal/frontier_coverage_20/group_std_mean": 0.09091974049806595,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011956531554460526,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011956531554460526,
"signal/frontier_coverage_25/centered_abs_mean": 0.13488344550132753,
"signal/frontier_coverage_25/group_std_mean": 0.17190809845924376,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024144135415554045,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024144135415554045,
"signal/frontier_coverage_5/centered_abs_mean": 0.1378859430551529,
"signal/frontier_coverage_5/group_std_mean": 0.20053677260875702,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024681583512574435,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024681583512574435,
"signal/frontier_ece_reward/centered_abs_mean": 0.009192906878888607,
"signal/frontier_ece_reward/group_std_mean": 0.01221153773367405,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011491133598610758,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011491133598610758,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_calibration/aurc": 0.15298984535701007,
"eval_calibration/batch_distribution_entropy": 0.6891585641828614,
"eval_calibration/buffer_distribution_entropy": 0.8339226172973552,
"eval_calibration/confidence_entropy": 0.34052887972179385,
"eval_calibration/coverage@0%": 0.14700940860215053,
"eval_calibration/coverage@1%": 0.14700940860215053,
"eval_calibration/coverage@10%": 0.4490927419354838,
"eval_calibration/coverage@15%": 0.6940524193548386,
"eval_calibration/coverage@20%": 0.827116935483871,
"eval_calibration/coverage@25%": 0.8949932795698925,
"eval_calibration/coverage@30%": 0.9734543010752689,
"eval_calibration/coverage@5%": 0.14700940860215053,
"eval_calibration/ece": 0.1637371228983639,
"eval_calibration/mean_confidence": 0.7425752090908834,
"eval_completions/clipped_ratio": 0.016319444444444442,
"eval_completions/max_length": 3322.0,
"eval_completions/max_terminated_length": 3322.0,
"eval_completions/mean_length": 1099.1554361979167,
"eval_completions/mean_terminated_length": 1117.191874186198,
"eval_completions/min_length": 0.0,
"eval_completions/min_terminated_length": 364.8333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 614848866.0,
"eval_reward": 1.0577454765637715,
"eval_reward_std": 0.2678757707277934,
"eval_rewards/accuracy_reward": 0.6918402711550394,
"eval_rewards/brier_reward": 0.8075468341509501,
"eval_rewards/confidence_uniqueness_reward": 0.8762139777342478,
"eval_rewards/format_reward": 0.9861111044883728,
"eval_rewards/frontier_aurc_reward": -0.002283926723369708,
"eval_rewards/frontier_coverage_1": 0.04415246595939001,
"eval_rewards/frontier_coverage_10": 0.04416414389076332,
"eval_rewards/frontier_coverage_15": 0.04174827644601464,
"eval_rewards/frontier_coverage_20": 0.051496884475151695,
"eval_rewards/frontier_coverage_25": 0.20257077117760977,
"eval_rewards/frontier_coverage_5": 0.04415246595939001,
"eval_rewards/frontier_ece_reward": 0.005393590700502197,
"eval_runtime": 213.6534,
"eval_samples_per_second": 4.68,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4120551198720932,
"eval_signal/accuracy_reward/group_std_mean": 0.459818700949351,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2060275599360466,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2060275599360466,
"eval_signal/advantage_abs_mean": 0.2272669697801272,
"eval_signal/advantage_pre_scale_abs_mean": 0.2272669697801272,
"eval_signal/advantage_pre_scale_std": 0.2672336275378863,
"eval_signal/advantage_std": 0.2672336275378863,
"eval_signal/brier_reward/centered_abs_mean": 0.23803439736366272,
"eval_signal/brier_reward/group_std_mean": 0.30198956032594043,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02975429967045784,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02975429967045784,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06008566605548064,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09701731304327647,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00751070825693508,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00751070825693508,
"eval_signal/format_reward/centered_abs_mean": 0.026475694496184587,
"eval_signal/format_reward/group_std_mean": 0.06907285718868177,
"eval_signal/format_reward/group_zero_std_frac": 0.6388889054457346,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013237847248092294,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.013237847248092294,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003976293024607003,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008325919586544236,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.117564200598281e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.117564200598281e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1956762745976448,
"eval_signal/frontier_coverage_1/group_std_mean": 0.32742465535799664,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035026053277154765,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035026053277154765,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.19371677935123444,
"eval_signal/frontier_coverage_10/group_std_mean": 0.3244449843962987,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003467530244961381,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003467530244961381,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.16138583918412527,
"eval_signal/frontier_coverage_15/group_std_mean": 0.27520785232385,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028888065523157516,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028888065523157516,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.08761031304796536,
"eval_signal/frontier_coverage_20/group_std_mean": 0.13085574780901274,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015682245721109211,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015682245721109211,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2675568262736003,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3178121993939082,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00478926720097661,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00478926720097661,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1956762745976448,
"eval_signal/frontier_coverage_5/group_std_mean": 0.32742465535799664,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035026053277154765,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035026053277154765,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.010918460320681334,
"eval_signal/frontier_ece_reward/group_std_mean": 0.0168185291501383,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013648075400851667,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013648075400851667,
"eval_steps_per_second": 0.028,
"step": 200
},
{
"calibration/aurc": 0.18313683195958363,
"calibration/batch_distribution_entropy": 0.7309309125681636,
"calibration/buffer_distribution_entropy": 0.8321117344650947,
"calibration/confidence_entropy": 0.36200907857604947,
"calibration/coverage@0%": 0.0058823529411764705,
"calibration/coverage@1%": 0.0058823529411764705,
"calibration/coverage@10%": 0.188358492065308,
"calibration/coverage@15%": 0.5072439560443204,
"calibration/coverage@20%": 0.6427671877795685,
"calibration/coverage@25%": 0.8473311156960994,
"calibration/coverage@30%": 0.9155880247589371,
"calibration/coverage@5%": 0.0058823529411764705,
"calibration/ece": 0.10859277111780778,
"calibration/mean_confidence": 0.7655174734872644,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011545138888888862,
"completions/max_length": 3830.8,
"completions/max_terminated_length": 3830.8,
"completions/mean_length": 1124.1480224609375,
"completions/mean_terminated_length": 1137.24013671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 313.4,
"epoch": 0.491993850076874,
"grad_norm": 0.00028088607359677553,
"learning_rate": 9.036144578313253e-08,
"loss": -0.0101,
"num_tokens": 630865003.0,
"reward": 1.0949723958969115,
"reward_std": 0.12291131764650345,
"rewards/accuracy_reward": 0.7439236164093017,
"rewards/brier_reward": 0.8352924466133118,
"rewards/confidence_uniqueness_reward": 0.9271109342575073,
"rewards/format_reward": 0.98828125,
"rewards/frontier_aurc_reward": -0.0017190412618219852,
"rewards/frontier_coverage_1": 0.03267882689833641,
"rewards/frontier_coverage_10": 0.03304030448198318,
"rewards/frontier_coverage_15": 0.03327622413635254,
"rewards/frontier_coverage_20": 0.057910379767417905,
"rewards/frontier_coverage_25": 0.25257430374622347,
"rewards/frontier_coverage_5": 0.03267882689833641,
"rewards/frontier_ece_reward": 0.005485412012785673,
"signal/accuracy_reward/centered_abs_mean": 0.1423394113779068,
"signal/accuracy_reward/group_std_mean": 0.1927516996860504,
"signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0711697056889534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0711697056889534,
"signal/advantage_abs_mean": 0.08618586659431457,
"signal/advantage_pre_scale_abs_mean": 0.08618586659431457,
"signal/advantage_pre_scale_std": 0.16105275750160217,
"signal/advantage_std": 0.16105275750160217,
"signal/brier_reward/centered_abs_mean": 0.13963095247745513,
"signal/brier_reward/group_std_mean": 0.18516895473003386,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01745386905968189,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01745386905968189,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04072900265455246,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06390283033251762,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005091125331819058,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005091125331819058,
"signal/format_reward/centered_abs_mean": 0.019070095382630825,
"signal/format_reward/group_std_mean": 0.03828651420772076,
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009535047691315413,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009535047691315413,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022460675798356535,
"signal/frontier_aurc_reward/group_std_mean": 0.003971707401797175,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.020460764877498e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.020460764877498e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1471869170665741,
"signal/frontier_coverage_1/group_std_mean": 0.21187746226787568,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026346457190811632,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026346457190811632,
"signal/frontier_coverage_10/centered_abs_mean": 0.14501943588256835,
"signal/frontier_coverage_10/group_std_mean": 0.2089345246553421,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025958478916436436,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025958478916436436,
"signal/frontier_coverage_15/centered_abs_mean": 0.11886921375989914,
"signal/frontier_coverage_15/group_std_mean": 0.17266935706138611,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021277590189129115,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021277590189129115,
"signal/frontier_coverage_20/centered_abs_mean": 0.07114373296499252,
"signal/frontier_coverage_20/group_std_mean": 0.09532740265130997,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012734727468341589,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012734727468341589,
"signal/frontier_coverage_25/centered_abs_mean": 0.1383225828409195,
"signal/frontier_coverage_25/group_std_mean": 0.1785325288772583,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002475974103435874,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002475974103435874,
"signal/frontier_coverage_5/centered_abs_mean": 0.1471869170665741,
"signal/frontier_coverage_5/group_std_mean": 0.21187746226787568,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026346457190811632,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026346457190811632,
"signal/frontier_ece_reward/centered_abs_mean": 0.009521047584712505,
"signal/frontier_ece_reward/group_std_mean": 0.012699404545128346,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011901309480890632,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011901309480890632,
"step": 205
},
{
"calibration/aurc": 0.15099192703946585,
"calibration/batch_distribution_entropy": 0.7362200352954403,
"calibration/buffer_distribution_entropy": 0.8299266209675906,
"calibration/confidence_entropy": 0.3875184856441491,
"calibration/coverage@0%": 0.03849518810148731,
"calibration/coverage@1%": 0.03849518810148731,
"calibration/coverage@10%": 0.2589676290463692,
"calibration/coverage@15%": 0.4965119491642492,
"calibration/coverage@20%": 0.8815515350877193,
"calibration/coverage@25%": 0.9703399122807017,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.07611548556430446,
"calibration/ece": 0.08016838343231748,
"calibration/mean_confidence": 0.771572898167351,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006655092592592597,
"completions/max_length": 3825.3333333333335,
"completions/max_terminated_length": 3825.3333333333335,
"completions/mean_length": 1118.4932250976562,
"completions/mean_terminated_length": 1126.0267740885417,
"completions/min_length": 0.0,
"completions/min_terminated_length": 286.6666666666667,
"epoch": 0.49919376007799904,
"num_tokens": 640462076.0,
"reward": 1.0744483868281047,
"reward_std": 0.12696021795272827,
"rewards/accuracy_reward": 0.7048611044883728,
"rewards/brier_reward": 0.8086513876914978,
"rewards/confidence_uniqueness_reward": 0.934039036432902,
"rewards/format_reward": 0.9932002226511637,
"rewards/frontier_aurc_reward": -0.002406407419281701,
"rewards/frontier_coverage_1": 0.030984345202644665,
"rewards/frontier_coverage_10": 0.030985131859779358,
"rewards/frontier_coverage_15": 0.03134462299446265,
"rewards/frontier_coverage_20": 0.04987387855847677,
"rewards/frontier_coverage_25": 0.21809764703114828,
"rewards/frontier_coverage_5": 0.030984345202644665,
"rewards/frontier_ece_reward": 0.004822776031990846,
"signal/accuracy_reward/centered_abs_mean": 0.1598126416405042,
"signal/accuracy_reward/group_std_mean": 0.2098775009314219,
"signal/accuracy_reward/group_zero_std_frac": 0.407407412926356,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0799063208202521,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0799063208202521,
"signal/advantage_abs_mean": 0.09329408903916676,
"signal/advantage_pre_scale_abs_mean": 0.09329408903916676,
"signal/advantage_pre_scale_std": 0.16142626603444418,
"signal/advantage_std": 0.16142626603444418,
"signal/brier_reward/centered_abs_mean": 0.14758913467327753,
"signal/brier_reward/group_std_mean": 0.18945661187171936,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01844864183415969,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01844864183415969,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03421806792418162,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05302715301513672,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004277258490522702,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004277258490522702,
"signal/format_reward/centered_abs_mean": 0.01250542514026165,
"signal/format_reward/group_std_mean": 0.027382840712865193,
"signal/format_reward/group_zero_std_frac": 0.875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006252712570130825,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006252712570130825,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030548664896438518,
"signal/frontier_aurc_reward/group_std_mean": 0.005336549288282792,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.468210899077045e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.468210899077045e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14464224129915237,
"signal/frontier_coverage_1/group_std_mean": 0.20606282353401184,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002589096004764239,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002589096004764239,
"signal/frontier_coverage_10/centered_abs_mean": 0.14266284555196762,
"signal/frontier_coverage_10/group_std_mean": 0.20341384410858154,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002553664923955997,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002553664923955997,
"signal/frontier_coverage_15/centered_abs_mean": 0.11402915418148041,
"signal/frontier_coverage_15/group_std_mean": 0.1643607368071874,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020411216343442598,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020411216343442598,
"signal/frontier_coverage_20/centered_abs_mean": 0.07020012413462003,
"signal/frontier_coverage_20/group_std_mean": 0.09288557122151057,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001256582133161525,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001256582133161525,
"signal/frontier_coverage_25/centered_abs_mean": 0.15206469098726907,
"signal/frontier_coverage_25/group_std_mean": 0.19445708394050598,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027219578623771667,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027219578623771667,
"signal/frontier_coverage_5/centered_abs_mean": 0.14464224129915237,
"signal/frontier_coverage_5/group_std_mean": 0.20606282353401184,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002589096004764239,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002589096004764239,
"signal/frontier_ece_reward/centered_abs_mean": 0.009401047912736734,
"signal/frontier_ece_reward/group_std_mean": 0.012525786645710468,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011751309890920918,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011751309890920918,
"step": 208,
"total_flos": 0.0,
"train_loss": -0.015914715025036667,
"train_runtime": 51338.4996,
"train_samples_per_second": 0.292,
"train_steps_per_second": 0.004
}
],
"logging_steps": 5,
"max_steps": 208,
"num_input_tokens_seen": 640462076,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}