9532 lines
587 KiB
JSON
9532 lines
587 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.625441469460826,
|
|
"calibration/batch_distribution_entropy": 0.6561097488908417,
|
|
"calibration/confidence_entropy": 0.34487710695615964,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.49470016071002154,
|
|
"calibration/mean_confidence": 0.7911784320650452,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03603515625,
|
|
"completions/max_length": 1509.6,
|
|
"completions/max_terminated_length": 1509.6,
|
|
"completions/mean_length": 214.22236328125,
|
|
"completions/mean_terminated_length": 222.21807250976562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.8,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.025879332795739174,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0125,
|
|
"num_tokens": 17037669.0,
|
|
"reward": 0.5616797089576722,
|
|
"reward_std": 0.4342596590518951,
|
|
"rewards/accuracy_reward": 0.22119140625,
|
|
"rewards/batch_coverage_0": 0.05096393823623657,
|
|
"rewards/batch_coverage_1": 0.05096393823623657,
|
|
"rewards/batch_coverage_10": 0.07000155597925187,
|
|
"rewards/batch_coverage_15": 0.0811154417693615,
|
|
"rewards/batch_coverage_20": 0.09202761203050613,
|
|
"rewards/batch_coverage_25": 0.10896564424037933,
|
|
"rewards/batch_coverage_5": 0.062107541412115094,
|
|
"rewards/brier_reward": 0.37876845598220826,
|
|
"rewards/confidence_uniqueness_reward": 0.4908894419670105,
|
|
"rewards/format_reward": 0.68837890625,
|
|
"rewards/frontier_aurc_reward": 0.30465003848075867,
|
|
"rewards/frontier_ece_reward": 0.30465003848075867,
|
|
"rewards/frontier_entropy_batch_reward": -0.6595894694328308,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.242242431640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.284113472700119,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1211212158203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1211212158203125,
|
|
"signal/advantage_abs_mean": 0.3647449553012848,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3647449553012848,
|
|
"signal/advantage_pre_scale_std": 0.4435017466545105,
|
|
"signal/advantage_std": 0.4435017466545105,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.07645872831344605,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1278139054775238,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.007645872887223959,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.007645872887223959,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.07645872831344605,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1278139054775238,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.007645872887223959,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.007645872887223959,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.08913887441158294,
|
|
"signal/batch_coverage_10/group_std_mean": 0.1408896714448929,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.008913887571543455,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.008913887571543455,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.09963576942682266,
|
|
"signal/batch_coverage_15/group_std_mean": 0.15215918719768523,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.00996357724070549,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.00996357724070549,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.11348118036985397,
|
|
"signal/batch_coverage_20/group_std_mean": 0.16662466824054717,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011348118260502815,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.011348118260502815,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13774538338184356,
|
|
"signal/batch_coverage_25/group_std_mean": 0.19362023174762727,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013774538971483707,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013774538971483707,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.08416972756385803,
|
|
"signal/batch_coverage_5/group_std_mean": 0.1365703135728836,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.008416973147541284,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.008416973147541284,
|
|
"signal/brier_reward/centered_abs_mean": 0.32081450819969176,
|
|
"signal/brier_reward/group_std_mean": 0.3656565546989441,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032081450521945956,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.032081450521945956,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2961998999118805,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.34802345037460325,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029619990289211272,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029619990289211272,
|
|
"signal/format_reward/centered_abs_mean": 0.400396728515625,
|
|
"signal/format_reward/group_std_mean": 0.4517782092094421,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2001983642578125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.2001983642578125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.29386022686958313,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3446810841560364,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036732527427375317,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036732527427375317,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.29386022686958313,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3446810841560364,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029386021941900254,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029386021941900254,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42292317748069763,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.47080921530723574,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04229231923818588,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04229231923818588,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6826533198028557,
|
|
"calibration/batch_distribution_entropy": 0.6420826904038014,
|
|
"calibration/confidence_entropy": 0.3419187196643387,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5423836619103499,
|
|
"calibration/mean_confidence": 0.7939850831364194,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0357421875,
|
|
"completions/max_length": 1513.4,
|
|
"completions/max_terminated_length": 1513.4,
|
|
"completions/mean_length": 203.18544921875,
|
|
"completions/mean_terminated_length": 210.78201904296876,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.018445724621415138,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0027,
|
|
"num_tokens": 34218640.0,
|
|
"reward": 0.5817601561546326,
|
|
"reward_std": 0.4154947519302368,
|
|
"rewards/accuracy_reward": 0.210546875,
|
|
"rewards/batch_coverage_0": 0.056951449066400525,
|
|
"rewards/batch_coverage_1": 0.056951449066400525,
|
|
"rewards/batch_coverage_10": 0.07880671173334122,
|
|
"rewards/batch_coverage_15": 0.096053147315979,
|
|
"rewards/batch_coverage_20": 0.10706809908151627,
|
|
"rewards/batch_coverage_25": 0.1138889878988266,
|
|
"rewards/batch_coverage_5": 0.06925816759467125,
|
|
"rewards/brier_reward": 0.3806018054485321,
|
|
"rewards/confidence_uniqueness_reward": 0.5155683040618897,
|
|
"rewards/format_reward": 0.7294921875,
|
|
"rewards/frontier_aurc_reward": 0.3003185033798218,
|
|
"rewards/frontier_ece_reward": 0.3003185033798218,
|
|
"rewards/frontier_entropy_batch_reward": -0.6956000924110413,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.22080078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2653929114341736,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.34375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.110400390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.110400390625,
|
|
"signal/advantage_abs_mean": 0.33845388889312744,
|
|
"signal/advantage_pre_scale_abs_mean": 0.33845388889312744,
|
|
"signal/advantage_pre_scale_std": 0.4272084295749664,
|
|
"signal/advantage_std": 0.4272084295749664,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.0791116937994957,
|
|
"signal/batch_coverage_0/group_std_mean": 0.12949045449495317,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.007911169622093438,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.007911169622093438,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.0791116937994957,
|
|
"signal/batch_coverage_1/group_std_mean": 0.12949045449495317,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.007911169622093438,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.007911169622093438,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.09277822971343994,
|
|
"signal/batch_coverage_10/group_std_mean": 0.14479392766952515,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.009277822449803352,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.009277822449803352,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.10906872600317001,
|
|
"signal/batch_coverage_15/group_std_mean": 0.1635884314775467,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.010906872153282166,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.010906872153282166,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1215769112110138,
|
|
"signal/batch_coverage_20/group_std_mean": 0.17692172527313232,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012157691456377507,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.012157691456377507,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13173424154520036,
|
|
"signal/batch_coverage_25/group_std_mean": 0.1880470871925354,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013173424638807774,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013173424638807774,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.08721371293067932,
|
|
"signal/batch_coverage_5/group_std_mean": 0.13860590159893035,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.008721371926367284,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.008721371926367284,
|
|
"signal/brier_reward/centered_abs_mean": 0.305008465051651,
|
|
"signal/brier_reward/group_std_mean": 0.35352975130081177,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03050084561109543,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03050084561109543,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2797845423221588,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.33646575808525087,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027978454902768134,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027978454902768134,
|
|
"signal/format_reward/centered_abs_mean": 0.368994140625,
|
|
"signal/format_reward/group_std_mean": 0.432600337266922,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1844970703125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1844970703125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.27742584943771365,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.33283730745315554,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003467823192477226,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003467823192477226,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.27742584943771365,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.33283730745315554,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02774258553981781,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02774258553981781,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3977131128311157,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4558208167552948,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03977131098508835,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03977131098508835,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5891158731428874,
|
|
"calibration/batch_distribution_entropy": 0.6520017602661853,
|
|
"calibration/buffer_distribution_entropy": 0.6638543025012997,
|
|
"calibration/confidence_entropy": 0.33411021460596607,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.46858668054755875,
|
|
"calibration/mean_confidence": 0.7970036765667995,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0201171875,
|
|
"completions/max_length": 1450.6,
|
|
"completions/max_terminated_length": 1450.6,
|
|
"completions/mean_length": 174.7373046875,
|
|
"completions/mean_terminated_length": 178.42660827636718,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.6,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.04330306500196457,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0076,
|
|
"num_tokens": 51056686.0,
|
|
"reward": 0.7281944155693054,
|
|
"reward_std": 0.36009618639945984,
|
|
"rewards/accuracy_reward": 0.271484375,
|
|
"rewards/batch_coverage_0": 0.09881696254014968,
|
|
"rewards/batch_coverage_1": 0.09881696254014968,
|
|
"rewards/batch_coverage_10": 0.13585815876722335,
|
|
"rewards/batch_coverage_15": 0.15109447687864302,
|
|
"rewards/batch_coverage_20": 0.1667906880378723,
|
|
"rewards/batch_coverage_25": 0.18214811384677887,
|
|
"rewards/batch_coverage_5": 0.11958295553922653,
|
|
"rewards/brier_reward": 0.48470067977905273,
|
|
"rewards/confidence_uniqueness_reward": 0.6335558533668518,
|
|
"rewards/format_reward": 0.87490234375,
|
|
"rewards/frontier_aurc_reward": 0.28310426576063036,
|
|
"rewards/frontier_ece_reward": 0.27106482833623885,
|
|
"rewards/frontier_entropy_batch_reward": -0.8278069615364074,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19892578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.24604250788688659,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.099462890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.099462890625,
|
|
"signal/advantage_abs_mean": 0.27411369383335116,
|
|
"signal/advantage_pre_scale_abs_mean": 0.27411369383335116,
|
|
"signal/advantage_pre_scale_std": 0.37025115489959715,
|
|
"signal/advantage_std": 0.37025115489959715,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.10894110947847366,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1668031245470047,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.010894111171364785,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.010894111171364785,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.10894110947847366,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1668031245470047,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.010894111171364785,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.010894111171364785,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1243826374411583,
|
|
"signal/batch_coverage_10/group_std_mean": 0.18446942567825317,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012438264302909374,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.012438264302909374,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13416066765785217,
|
|
"signal/batch_coverage_15/group_std_mean": 0.19630922377109528,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013416066579520703,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013416066579520703,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.14895718097686766,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2133055567741394,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01489571835845709,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01489571835845709,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.16923868358135224,
|
|
"signal/batch_coverage_25/group_std_mean": 0.23628869652748108,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016923869028687477,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.016923869028687477,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.11639369875192643,
|
|
"signal/batch_coverage_5/group_std_mean": 0.17475835084915162,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.011639369651675224,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.011639369651675224,
|
|
"signal/brier_reward/centered_abs_mean": 0.2798484146595001,
|
|
"signal/brier_reward/group_std_mean": 0.33471320271492006,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02798484228551388,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02798484228551388,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21195151209831237,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.27575126886367796,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021195151284337042,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021195151284337042,
|
|
"signal/format_reward/centered_abs_mean": 0.204010009765625,
|
|
"signal/format_reward/group_std_mean": 0.30632642805576327,
|
|
"signal/format_reward/group_zero_std_frac": 0.053125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1020050048828125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1020050048828125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2093428259715438,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.25139847891405226,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0026167853160586675,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0026167853160586675,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2434532254934311,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.29156210720539094,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024345323070883752,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024345323070883752,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2686063975095749,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37301525473594666,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026860639825463294,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026860639825463294,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.522754084713956,
|
|
"calibration/batch_distribution_entropy": 0.738960728697382,
|
|
"calibration/buffer_distribution_entropy": 0.6672639500396118,
|
|
"calibration/confidence_entropy": 0.3410296290421736,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.35869172803053007,
|
|
"calibration/mean_confidence": 0.7331278499437659,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00751953125,
|
|
"completions/max_length": 1419.2,
|
|
"completions/max_terminated_length": 1419.2,
|
|
"completions/mean_length": 133.2443359375,
|
|
"completions/mean_terminated_length": 134.2860855102539,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.01446569710969925,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0051,
|
|
"num_tokens": 67339508.0,
|
|
"reward": 0.8069659948349,
|
|
"reward_std": 0.29989936351776125,
|
|
"rewards/accuracy_reward": 0.32421875,
|
|
"rewards/batch_coverage_0": 0.12223946899175644,
|
|
"rewards/batch_coverage_1": 0.12223946899175644,
|
|
"rewards/batch_coverage_10": 0.18216916620731355,
|
|
"rewards/batch_coverage_15": 0.19795053601264953,
|
|
"rewards/batch_coverage_20": 0.2243308424949646,
|
|
"rewards/batch_coverage_25": 0.23708344697952272,
|
|
"rewards/batch_coverage_5": 0.14572731405496597,
|
|
"rewards/brier_reward": 0.5832386374473572,
|
|
"rewards/confidence_uniqueness_reward": 0.7324810743331909,
|
|
"rewards/format_reward": 0.96865234375,
|
|
"rewards/frontier_aurc_reward": -0.006812120229005814,
|
|
"rewards/frontier_ece_reward": -0.04467292013578117,
|
|
"rewards/frontier_entropy_batch_reward": -0.8966308832168579,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20263671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2531333029270172,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.34375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.101318359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.101318359375,
|
|
"signal/advantage_abs_mean": 0.2353407621383667,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2353407621383667,
|
|
"signal/advantage_pre_scale_std": 0.3037436902523041,
|
|
"signal/advantage_std": 0.3037436902523041,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.16245611310005187,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23634712100028993,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01624561119824648,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01624561119824648,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.16245611310005187,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23634712100028993,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01624561119824648,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01624561119824648,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.18128455579280853,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2564110219478607,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.018128455430269242,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.018128455430269242,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.18811092674732208,
|
|
"signal/batch_coverage_15/group_std_mean": 0.26361055076122286,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.018811094015836714,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.018811094015836714,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.20965143740177156,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2887492120265961,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.020965144410729407,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.020965144410729407,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.22187889814376832,
|
|
"signal/batch_coverage_25/group_std_mean": 0.30295662879943847,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.022187890857458113,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.022187890857458113,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1677585393190384,
|
|
"signal/batch_coverage_5/group_std_mean": 0.24008685052394868,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01677585393190384,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01677585393190384,
|
|
"signal/brier_reward/centered_abs_mean": 0.267183381319046,
|
|
"signal/brier_reward/group_std_mean": 0.32429797053337095,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026718338951468468,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.026718338951468468,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15027010142803193,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.19157288372516632,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015027010440826416,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015027010440826416,
|
|
"signal/format_reward/centered_abs_mean": 0.053790283203125,
|
|
"signal/format_reward/group_std_mean": 0.11199843138456345,
|
|
"signal/format_reward/group_zero_std_frac": 0.496875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0268951416015625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0268951416015625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004937331937253475,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006630830001085997,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.171664863359183e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.171664863359183e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.14534821808338166,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.17288282215595246,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01453482247889042,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01453482247889042,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17549372911453248,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.30022566914558413,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01754937395453453,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01754937395453453,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6308852649276485,
|
|
"calibration/batch_distribution_entropy": 0.7947961294672216,
|
|
"calibration/buffer_distribution_entropy": 0.7157307539372637,
|
|
"calibration/confidence_entropy": 0.28688856649436556,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.2848839505115735,
|
|
"calibration/mean_confidence": 0.4834632274141229,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0044921875,
|
|
"completions/max_length": 765.8,
|
|
"completions/max_terminated_length": 765.8,
|
|
"completions/mean_length": 108.28251953125,
|
|
"completions/mean_terminated_length": 108.77434997558593,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 13.4,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.03482503816485405,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.004,
|
|
"num_tokens": 83381473.0,
|
|
"reward": 0.9376622438430786,
|
|
"reward_std": 0.31385440230369566,
|
|
"rewards/accuracy_reward": 0.32197265625,
|
|
"rewards/batch_coverage_0": 0.2897476017475128,
|
|
"rewards/batch_coverage_1": 0.2897476017475128,
|
|
"rewards/batch_coverage_10": 0.3420256972312927,
|
|
"rewards/batch_coverage_15": 0.3555846869945526,
|
|
"rewards/batch_coverage_20": 0.3710071802139282,
|
|
"rewards/batch_coverage_25": 0.37447254061698915,
|
|
"rewards/batch_coverage_5": 0.3116483360528946,
|
|
"rewards/brier_reward": 0.6967913031578064,
|
|
"rewards/confidence_uniqueness_reward": 0.6951715230941773,
|
|
"rewards/format_reward": 0.9861328125,
|
|
"rewards/frontier_aurc_reward": -0.005871403589844703,
|
|
"rewards/frontier_ece_reward": -0.014616276603192091,
|
|
"rewards/frontier_entropy_batch_reward": -0.8747512936592102,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.191436767578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2405393362045288,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0957183837890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0957183837890625,
|
|
"signal/advantage_abs_mean": 0.26043112874031066,
|
|
"signal/advantage_pre_scale_abs_mean": 0.26043112874031066,
|
|
"signal/advantage_pre_scale_std": 0.3133840084075928,
|
|
"signal/advantage_std": 0.3133840084075928,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.30262792110443115,
|
|
"signal/batch_coverage_0/group_std_mean": 0.3807533621788025,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.030262791365385056,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.030262791365385056,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.30262792110443115,
|
|
"signal/batch_coverage_1/group_std_mean": 0.3807533621788025,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.030262791365385056,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.030262791365385056,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.317511785030365,
|
|
"signal/batch_coverage_10/group_std_mean": 0.39243263006210327,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.031751178577542304,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.031751178577542304,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.3235657215118408,
|
|
"signal/batch_coverage_15/group_std_mean": 0.39802160263061526,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.032356572523713115,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.032356572523713115,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.333120733499527,
|
|
"signal/batch_coverage_20/group_std_mean": 0.4084656834602356,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.033312073722481725,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.033312073722481725,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.3283375442028046,
|
|
"signal/batch_coverage_25/group_std_mean": 0.4016383528709412,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.032833756506443025,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.032833756506443025,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.30745909810066224,
|
|
"signal/batch_coverage_5/group_std_mean": 0.38337884545326234,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.030745909363031388,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.030745909363031388,
|
|
"signal/brier_reward/centered_abs_mean": 0.2754803538322449,
|
|
"signal/brier_reward/group_std_mean": 0.33621970415115354,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027548035979270934,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.027548035979270934,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19807184338569642,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.22963170409202577,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01980718448758125,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01980718448758125,
|
|
"signal/format_reward/centered_abs_mean": 0.0231201171875,
|
|
"signal/format_reward/group_std_mean": 0.051596745103597644,
|
|
"signal/format_reward/group_zero_std_frac": 0.75,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01156005859375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01156005859375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036453432869166134,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005177795048803091,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5566792687168344e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5566792687168344e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.11404286623001099,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.14420087337493898,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01140428688377142,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01140428688377142,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21047287285327912,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3506768882274628,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021047287806868552,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021047287806868552,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.7548298680504103,
|
|
"calibration/batch_distribution_entropy": 0.4567463163976795,
|
|
"calibration/buffer_distribution_entropy": 0.7713618183929173,
|
|
"calibration/confidence_entropy": 0.12036518782927541,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.030528375733855185,
|
|
"calibration/coverage@30%": 0.04031311154598825,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.14627281235381873,
|
|
"calibration/mean_confidence": 0.15631772699737523,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0021484375,
|
|
"completions/max_length": 1229.4,
|
|
"completions/max_terminated_length": 1229.4,
|
|
"completions/mean_length": 113.3099609375,
|
|
"completions/mean_terminated_length": 113.5542236328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 17.4,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.004159490577876568,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0045,
|
|
"num_tokens": 99586375.0,
|
|
"reward": 0.9887324690818786,
|
|
"reward_std": 0.2425031691789627,
|
|
"rewards/accuracy_reward": 0.21640625,
|
|
"rewards/batch_coverage_0": 0.5490703642368316,
|
|
"rewards/batch_coverage_1": 0.5490703642368316,
|
|
"rewards/batch_coverage_10": 0.5690686941146851,
|
|
"rewards/batch_coverage_15": 0.5694445013999939,
|
|
"rewards/batch_coverage_20": 0.5644742488861084,
|
|
"rewards/batch_coverage_25": 0.5636660337448121,
|
|
"rewards/batch_coverage_5": 0.557263171672821,
|
|
"rewards/brier_reward": 0.8261630177497864,
|
|
"rewards/confidence_uniqueness_reward": -0.055358816683292386,
|
|
"rewards/format_reward": 0.993359375,
|
|
"rewards/frontier_aurc_reward": -0.0052281500771641735,
|
|
"rewards/frontier_ece_reward": 0.006546266423538327,
|
|
"rewards/frontier_entropy_batch_reward": -0.8602579355239868,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1811279296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2211446762084961,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09056396484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09056396484375,
|
|
"signal/advantage_abs_mean": 0.1823619097471237,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1823619097471237,
|
|
"signal/advantage_pre_scale_std": 0.2679218739271164,
|
|
"signal/advantage_std": 0.2679218739271164,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.2666642487049103,
|
|
"signal/batch_coverage_0/group_std_mean": 0.34697710871696474,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.059375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.026666425913572312,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.026666425913572312,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.2666642487049103,
|
|
"signal/batch_coverage_1/group_std_mean": 0.34697710871696474,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.059375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.026666425913572312,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.026666425913572312,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.26514615416526793,
|
|
"signal/batch_coverage_10/group_std_mean": 0.3440410554409027,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.059375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02651461660861969,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.02651461660861969,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.2547744870185852,
|
|
"signal/batch_coverage_15/group_std_mean": 0.33346186876296996,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.059375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.025477449968457223,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.025477449968457223,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.24541459679603578,
|
|
"signal/batch_coverage_20/group_std_mean": 0.3252484619617462,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.059375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.024541460536420347,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.024541460536420347,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.24044472873210906,
|
|
"signal/batch_coverage_25/group_std_mean": 0.32090034484863283,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.059375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.024044474214315416,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.024044474214315416,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.2699785053730011,
|
|
"signal/batch_coverage_5/group_std_mean": 0.35016059279441836,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.059375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.026997851207852364,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.026997851207852364,
|
|
"signal/brier_reward/centered_abs_mean": 0.20681797564029694,
|
|
"signal/brier_reward/group_std_mean": 0.27285282015800477,
|
|
"signal/brier_reward/group_zero_std_frac": 0.053125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020681798458099365,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020681798458099365,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.37839528918266296,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.47339463233947754,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.065625,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03783952966332436,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03783952966332436,
|
|
"signal/format_reward/centered_abs_mean": 0.01219482421875,
|
|
"signal/format_reward/group_std_mean": 0.031007519736886025,
|
|
"signal/format_reward/group_zero_std_frac": 0.8375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006097412109375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006097412109375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016961080371402204,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027939900755882263,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1201351046329364e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1201351046329364e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04961966909468174,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07686802893877029,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.053125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0049619670957326886,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0049619670957326886,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22081681489944457,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33247880935668944,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022081680968403815,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022081680968403815,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.7739565546771938,
|
|
"calibration/batch_distribution_entropy": 0.2388587210176225,
|
|
"calibration/buffer_distribution_entropy": 0.7697787894619414,
|
|
"calibration/confidence_entropy": 0.04604423048437065,
|
|
"calibration/coverage@0%": 0.01215686274509804,
|
|
"calibration/coverage@1%": 0.01215686274509804,
|
|
"calibration/coverage@10%": 0.027936138882430444,
|
|
"calibration/coverage@15%": 0.030681236921646128,
|
|
"calibration/coverage@20%": 0.05222619536742088,
|
|
"calibration/coverage@25%": 0.06124119862899948,
|
|
"calibration/coverage@30%": 0.07420732719965457,
|
|
"calibration/coverage@5%": 0.012549019607843137,
|
|
"calibration/ece": 0.1059488157666342,
|
|
"calibration/mean_confidence": 0.06527309548961124,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00244140625,
|
|
"completions/max_length": 767.8,
|
|
"completions/max_terminated_length": 767.8,
|
|
"completions/mean_length": 117.16904296875,
|
|
"completions/mean_terminated_length": 117.45823516845704,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 19.2,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.01868264377117157,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0063,
|
|
"num_tokens": 115895658.0,
|
|
"reward": 0.9797860622406006,
|
|
"reward_std": 0.16158705055713654,
|
|
"rewards/accuracy_reward": 0.1146484375,
|
|
"rewards/batch_coverage_0": 0.6942908763885498,
|
|
"rewards/batch_coverage_1": 0.6942908763885498,
|
|
"rewards/batch_coverage_10": 0.699905002117157,
|
|
"rewards/batch_coverage_15": 0.7006630778312684,
|
|
"rewards/batch_coverage_20": 0.6973462700843811,
|
|
"rewards/batch_coverage_25": 0.6931140184402466,
|
|
"rewards/batch_coverage_5": 0.6949953079223633,
|
|
"rewards/brier_reward": 0.910808777809143,
|
|
"rewards/confidence_uniqueness_reward": -0.6458814024925232,
|
|
"rewards/format_reward": 0.995703125,
|
|
"rewards/frontier_aurc_reward": -0.005381030216813088,
|
|
"rewards/frontier_ece_reward": 0.008919767942279578,
|
|
"rewards/frontier_entropy_batch_reward": -0.9016774773597718,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1273193359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.16958228945732118,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.515625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06365966796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06365966796875,
|
|
"signal/advantage_abs_mean": 0.10710721462965012,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10710721462965012,
|
|
"signal/advantage_pre_scale_std": 0.21973832249641417,
|
|
"signal/advantage_std": 0.21973832249641417,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.13674386590719223,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20728526413440704,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.278125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013674386776983738,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.013674386776983738,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.13674386590719223,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20728526413440704,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.278125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013674386776983738,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.013674386776983738,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13614047914743424,
|
|
"signal/batch_coverage_10/group_std_mean": 0.20697369873523713,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.278125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013614048063755036,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013614048063755036,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13347171396017074,
|
|
"signal/batch_coverage_15/group_std_mean": 0.20461286902427672,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.278125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013347171433269977,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013347171433269977,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.13021294325590133,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2026127427816391,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.278125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013021294586360454,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013021294586360454,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.12717701941728593,
|
|
"signal/batch_coverage_25/group_std_mean": 0.1998313844203949,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.278125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012717702239751816,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.012717702239751816,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13720745891332625,
|
|
"signal/batch_coverage_5/group_std_mean": 0.20782139897346497,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.278125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013720746710896492,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013720746710896492,
|
|
"signal/brier_reward/centered_abs_mean": 0.12075443416833878,
|
|
"signal/brier_reward/group_std_mean": 0.175453719496727,
|
|
"signal/brier_reward/group_zero_std_frac": 0.278125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012075443752110004,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012075443752110004,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22815002501010895,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3402702987194061,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.334375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022815002501010893,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022815002501010893,
|
|
"signal/format_reward/centered_abs_mean": 0.00767822265625,
|
|
"signal/format_reward/group_std_mean": 0.01842188462615013,
|
|
"signal/format_reward/group_zero_std_frac": 0.90625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003839111328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.003839111328125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0008029210846871137,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001404245011508465,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.05,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.003651377686765e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.003651377686765e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01746743656694889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03470666408538818,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.275,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017467437544837594,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017467437544837594,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14744934737682341,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23726414144039154,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.35625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014744934998452664,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014744934998452664,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.8121248519963785,
|
|
"calibration/batch_distribution_entropy": 0.22457522420408763,
|
|
"calibration/buffer_distribution_entropy": 0.7391312668108464,
|
|
"calibration/confidence_entropy": 0.05101885456238644,
|
|
"calibration/coverage@0%": 0.0027497207134327207,
|
|
"calibration/coverage@1%": 0.0027497207134327207,
|
|
"calibration/coverage@10%": 0.018466813051350207,
|
|
"calibration/coverage@15%": 0.04158185770742347,
|
|
"calibration/coverage@20%": 0.06825324052704215,
|
|
"calibration/coverage@25%": 0.08276386917228877,
|
|
"calibration/coverage@30%": 0.09334061110516817,
|
|
"calibration/coverage@5%": 0.010608266882391464,
|
|
"calibration/ece": 0.06685038780857414,
|
|
"calibration/mean_confidence": 0.05442588489874869,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00224609375,
|
|
"completions/max_length": 450.6,
|
|
"completions/max_terminated_length": 450.6,
|
|
"completions/mean_length": 130.52333984375,
|
|
"completions/mean_terminated_length": 130.82366027832032,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 42.2,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.010998404584825039,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0078,
|
|
"num_tokens": 132148889.0,
|
|
"reward": 0.9904319882392884,
|
|
"reward_std": 0.1573132574558258,
|
|
"rewards/accuracy_reward": 0.11435546875,
|
|
"rewards/batch_coverage_0": 0.6873297572135926,
|
|
"rewards/batch_coverage_1": 0.6873297572135926,
|
|
"rewards/batch_coverage_10": 0.6906278967857361,
|
|
"rewards/batch_coverage_15": 0.6921732783317566,
|
|
"rewards/batch_coverage_20": 0.6892393708229065,
|
|
"rewards/batch_coverage_25": 0.6890079140663147,
|
|
"rewards/batch_coverage_5": 0.689215111732483,
|
|
"rewards/brier_reward": 0.9234801173210144,
|
|
"rewards/confidence_uniqueness_reward": -0.5425438076257706,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/frontier_aurc_reward": -0.0053546403534710406,
|
|
"rewards/frontier_ece_reward": 0.007387215364724397,
|
|
"rewards/frontier_entropy_batch_reward": -0.8605040788650513,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.117291259765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15457678139209746,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.565625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0586456298828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0586456298828125,
|
|
"signal/advantage_abs_mean": 0.10570785701274872,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10570785701274872,
|
|
"signal/advantage_pre_scale_std": 0.21785781383514405,
|
|
"signal/advantage_std": 0.21785781383514405,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.10327832996845246,
|
|
"signal/batch_coverage_0/group_std_mean": 0.16254315078258513,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.25,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.010327833332121372,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.010327833332121372,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.10327832996845246,
|
|
"signal/batch_coverage_1/group_std_mean": 0.16254315078258513,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.25,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.010327833332121372,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.010327833332121372,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.10503756999969482,
|
|
"signal/batch_coverage_10/group_std_mean": 0.16518883407115936,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.25,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.010503756627440453,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.010503756627440453,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.10226047039031982,
|
|
"signal/batch_coverage_15/group_std_mean": 0.16291693598031998,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.25,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.010226047411561013,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.010226047411561013,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.10146261975169182,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1630474865436554,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.25,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.010146262310445308,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.010146262310445308,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.09983718618750573,
|
|
"signal/batch_coverage_25/group_std_mean": 0.1619284689426422,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.25,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.009983718767762184,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.009983718767762184,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.10402362942695617,
|
|
"signal/batch_coverage_5/group_std_mean": 0.16306559443473817,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.25,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.010402363166213036,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.010402363166213036,
|
|
"signal/brier_reward/centered_abs_mean": 0.10254615992307663,
|
|
"signal/brier_reward/group_std_mean": 0.1524132326245308,
|
|
"signal/brier_reward/group_zero_std_frac": 0.259375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010254616104066371,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010254616104066371,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2588571161031723,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.36650630831718445,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.3125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02588571347296238,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02588571347296238,
|
|
"signal/format_reward/centered_abs_mean": 0.00748291015625,
|
|
"signal/format_reward/group_std_mean": 0.020297119021415712,
|
|
"signal/format_reward/group_zero_std_frac": 0.890625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003741455078125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.003741455078125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0009247717563994229,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0014879585476592183,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.121875,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.155964746430982e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.155964746430982e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019005920551717282,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0374838687479496,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.271875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019005921203643084,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019005921203643084,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1939413219690323,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2823514461517334,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.309375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01939413193613291,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01939413193613291,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.50818107219839,
|
|
"calibration/batch_distribution_entropy": 0.5765797611568575,
|
|
"calibration/buffer_distribution_entropy": 0.7184762427451847,
|
|
"calibration/confidence_entropy": 0.15950569358905747,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.09775760184714125,
|
|
"calibration/coverage@15%": 0.15666653621883925,
|
|
"calibration/coverage@20%": 0.21749429764877912,
|
|
"calibration/coverage@25%": 0.24952578011709087,
|
|
"calibration/coverage@30%": 0.31821007469375323,
|
|
"calibration/coverage@5%": 0.014624505928853754,
|
|
"calibration/ece": 0.1193030803702988,
|
|
"calibration/mean_confidence": 0.20761059397605647,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00859375,
|
|
"completions/max_length": 610.6,
|
|
"completions/max_terminated_length": 610.6,
|
|
"completions/mean_length": 150.23427734375,
|
|
"completions/mean_terminated_length": 151.54664611816406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 56.4,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.007136950735002756,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.009,
|
|
"num_tokens": 148637720.0,
|
|
"reward": 1.0417076230049134,
|
|
"reward_std": 0.2498043328523636,
|
|
"rewards/accuracy_reward": 0.27431640625,
|
|
"rewards/batch_coverage_0": 0.5050425410270691,
|
|
"rewards/batch_coverage_1": 0.5050425410270691,
|
|
"rewards/batch_coverage_10": 0.5385935366153717,
|
|
"rewards/batch_coverage_15": 0.535876190662384,
|
|
"rewards/batch_coverage_20": 0.5360523641109467,
|
|
"rewards/batch_coverage_25": 0.5326862096786499,
|
|
"rewards/batch_coverage_5": 0.5235825717449188,
|
|
"rewards/brier_reward": 0.8559035062789917,
|
|
"rewards/confidence_uniqueness_reward": 0.2509185492992401,
|
|
"rewards/format_reward": 0.98984375,
|
|
"rewards/frontier_aurc_reward": -0.0038861197885125876,
|
|
"rewards/frontier_ece_reward": 0.01809915155172348,
|
|
"rewards/frontier_entropy_batch_reward": -0.7050358533859253,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.185162353515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2300116926431656,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0925811767578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0925811767578125,
|
|
"signal/advantage_abs_mean": 0.18442415297031403,
|
|
"signal/advantage_pre_scale_abs_mean": 0.18442415297031403,
|
|
"signal/advantage_pre_scale_std": 0.28187933266162873,
|
|
"signal/advantage_std": 0.28187933266162873,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.19961527585983277,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2660757750272751,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.03125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01996152810752392,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01996152810752392,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.19961527585983277,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2660757750272751,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.03125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01996152810752392,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01996152810752392,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.20459548830986024,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2712189704179764,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.03125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.020459549874067305,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.020459549874067305,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.19978629052639008,
|
|
"signal/batch_coverage_15/group_std_mean": 0.26815671324729917,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.03125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01997862905263901,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01997862905263901,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.19783964157104492,
|
|
"signal/batch_coverage_20/group_std_mean": 0.26660596430301664,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.03125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019783964194357396,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.019783964194357396,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.19625370502471923,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2650555014610291,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.03125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.019625371135771275,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.019625371135771275,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.20749586820602417,
|
|
"signal/batch_coverage_5/group_std_mean": 0.27330430746078493,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.03125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02074958700686693,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.02074958700686693,
|
|
"signal/brier_reward/centered_abs_mean": 0.16967381834983825,
|
|
"signal/brier_reward/group_std_mean": 0.23079933822155,
|
|
"signal/brier_reward/group_zero_std_frac": 0.03125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016967381909489633,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016967381909489633,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.3156610667705536,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3933307945728302,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.034375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031566106528043744,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.031566106528043744,
|
|
"signal/format_reward/centered_abs_mean": 0.01905517578125,
|
|
"signal/format_reward/group_std_mean": 0.04808509647846222,
|
|
"signal/format_reward/group_zero_std_frac": 0.753125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009527587890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009527587890625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002761203283444047,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003953706519678235,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4515042352722956e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4515042352722956e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05492620766162872,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08251123428344727,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.034375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005492620915174484,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005492620915174484,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35702455043792725,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4454483091831207,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.05,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03570245616137981,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03570245616137981,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3948438466039467,
|
|
"calibration/batch_distribution_entropy": 0.7543689498325424,
|
|
"calibration/buffer_distribution_entropy": 0.7218805161850744,
|
|
"calibration/confidence_entropy": 0.2475598733219218,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.029306930693069305,
|
|
"calibration/coverage@20%": 0.04554455445544554,
|
|
"calibration/coverage@25%": 0.23985486146123733,
|
|
"calibration/coverage@30%": 0.38845830147412347,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.1511983118975085,
|
|
"calibration/mean_confidence": 0.3666756006103048,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008984375,
|
|
"completions/max_length": 529.8,
|
|
"completions/max_terminated_length": 529.8,
|
|
"completions/mean_length": 166.7224609375,
|
|
"completions/mean_terminated_length": 168.22821655273438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 68.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.002798353089019656,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0092,
|
|
"num_tokens": 165365886.0,
|
|
"reward": 1.117294979095459,
|
|
"reward_std": 0.26995018124580383,
|
|
"rewards/accuracy_reward": 0.3697265625,
|
|
"rewards/batch_coverage_0": 0.4271726250648499,
|
|
"rewards/batch_coverage_1": 0.4271726250648499,
|
|
"rewards/batch_coverage_10": 0.49650421142578127,
|
|
"rewards/batch_coverage_15": 0.5068722724914551,
|
|
"rewards/batch_coverage_20": 0.5134539842605591,
|
|
"rewards/batch_coverage_25": 0.5107719004154205,
|
|
"rewards/batch_coverage_5": 0.4713205575942993,
|
|
"rewards/brier_reward": 0.8003980636596679,
|
|
"rewards/confidence_uniqueness_reward": 0.7806328177452088,
|
|
"rewards/format_reward": 0.990625,
|
|
"rewards/frontier_aurc_reward": -0.00399963753297925,
|
|
"rewards/frontier_ece_reward": 0.01983025260269642,
|
|
"rewards/frontier_entropy_batch_reward": -0.5824374079704284,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16785888671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.21611510515213012,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.403125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.083929443359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.083929443359375,
|
|
"signal/advantage_abs_mean": 0.20416966676712037,
|
|
"signal/advantage_pre_scale_abs_mean": 0.20416966676712037,
|
|
"signal/advantage_pre_scale_std": 0.28179879784584044,
|
|
"signal/advantage_std": 0.28179879784584044,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.24080052077770234,
|
|
"signal/batch_coverage_0/group_std_mean": 0.3102306604385376,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.024080053344368933,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.024080053344368933,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.24080052077770234,
|
|
"signal/batch_coverage_1/group_std_mean": 0.3102306604385376,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.024080053344368933,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.024080053344368933,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.2546322673559189,
|
|
"signal/batch_coverage_10/group_std_mean": 0.32426918745040895,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.025463227182626724,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.025463227182626724,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.25954718291759493,
|
|
"signal/batch_coverage_15/group_std_mean": 0.32985265254974366,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02595471851527691,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.02595471851527691,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.2642736345529556,
|
|
"signal/batch_coverage_20/group_std_mean": 0.3358638346195221,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.026427363604307176,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.026427363604307176,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.2585195034742355,
|
|
"signal/batch_coverage_25/group_std_mean": 0.3294057846069336,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02585195079445839,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.02585195079445839,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.2501774102449417,
|
|
"signal/batch_coverage_5/group_std_mean": 0.3195813298225403,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.025017741695046426,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.025017741695046426,
|
|
"signal/brier_reward/centered_abs_mean": 0.20010380446910858,
|
|
"signal/brier_reward/group_std_mean": 0.2640431046485901,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020010380446910857,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020010380446910857,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.14535830169916153,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1824635833501816,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014535830728709698,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014535830728709698,
|
|
"signal/format_reward/centered_abs_mean": 0.01787109375,
|
|
"signal/format_reward/group_std_mean": 0.04609687626361847,
|
|
"signal/format_reward/group_zero_std_frac": 0.7625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008935546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008935546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00380462440662086,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005796490237116814,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.755780755658634e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.755780755658634e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07341517955064773,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.101849465072155,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007341517694294452,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007341517694294452,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4398982286453247,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5136456608772277,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.043989823758602144,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.043989823758602144,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.5988549197356936,
|
|
"eval_calibration/batch_distribution_entropy": 0.6807624415706247,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7286592836154312,
|
|
"eval_calibration/confidence_entropy": 0.259052853462329,
|
|
"eval_calibration/coverage@0%": 0.0390625,
|
|
"eval_calibration/coverage@1%": 0.0390625,
|
|
"eval_calibration/coverage@10%": 0.0390625,
|
|
"eval_calibration/coverage@15%": 0.0390625,
|
|
"eval_calibration/coverage@20%": 0.078125,
|
|
"eval_calibration/coverage@25%": 0.078125,
|
|
"eval_calibration/coverage@30%": 0.0859375,
|
|
"eval_calibration/coverage@5%": 0.0390625,
|
|
"eval_calibration/ece": 0.2969021597053368,
|
|
"eval_calibration/mean_confidence": 0.4876565724405962,
|
|
"eval_completions/clipped_ratio": 0.00390625,
|
|
"eval_completions/max_length": 358.5,
|
|
"eval_completions/max_terminated_length": 358.5,
|
|
"eval_completions/mean_length": 174.25215530395508,
|
|
"eval_completions/mean_terminated_length": 174.94443893432617,
|
|
"eval_completions/min_length": 45.75,
|
|
"eval_completions/min_terminated_length": 94.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 165365886.0,
|
|
"eval_reward": 0.9293602705001831,
|
|
"eval_reward_std": 0.29658710211515427,
|
|
"eval_rewards/accuracy_reward": 0.3125,
|
|
"eval_rewards/batch_coverage_0": 0.3174902945756912,
|
|
"eval_rewards/batch_coverage_1": 0.3174902945756912,
|
|
"eval_rewards/batch_coverage_10": 0.3108842074871063,
|
|
"eval_rewards/batch_coverage_15": 0.30682356655597687,
|
|
"eval_rewards/batch_coverage_20": 0.29854603856801987,
|
|
"eval_rewards/batch_coverage_25": 0.29226575046777725,
|
|
"eval_rewards/batch_coverage_5": 0.3174902945756912,
|
|
"eval_rewards/brier_reward": 0.7489841133356094,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8224703967571259,
|
|
"eval_rewards/format_reward": 0.99609375,
|
|
"eval_rewards/frontier_aurc_reward": -0.005349113023839891,
|
|
"eval_rewards/frontier_ece_reward": 0.014951157238101587,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.99609375,
|
|
"eval_runtime": 37.8213,
|
|
"eval_samples_per_second": 13.22,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.416015625,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.46149376779794693,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2080078125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2080078125,
|
|
"eval_signal/advantage_abs_mean": 0.25766993314027786,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.25766993314027786,
|
|
"eval_signal/advantage_pre_scale_std": 0.29480960220098495,
|
|
"eval_signal/advantage_std": 0.29480960220098495,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.4219600185751915,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.48065635561943054,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04219600185751915,
|
|
"eval_signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04219600185751915,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.4219600185751915,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.48065635561943054,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04219600185751915,
|
|
"eval_signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04219600185751915,
|
|
"eval_signal/batch_coverage_10/centered_abs_mean": 0.41396255046129227,
|
|
"eval_signal/batch_coverage_10/group_std_mean": 0.4719335660338402,
|
|
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04139625560492277,
|
|
"eval_signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.04139625560492277,
|
|
"eval_signal/batch_coverage_15/centered_abs_mean": 0.4079297110438347,
|
|
"eval_signal/batch_coverage_15/group_std_mean": 0.46547579765319824,
|
|
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04079297184944153,
|
|
"eval_signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04079297184944153,
|
|
"eval_signal/batch_coverage_20/centered_abs_mean": 0.3940034508705139,
|
|
"eval_signal/batch_coverage_20/group_std_mean": 0.4500989094376564,
|
|
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.039400345645844936,
|
|
"eval_signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.039400345645844936,
|
|
"eval_signal/batch_coverage_25/centered_abs_mean": 0.3857416883111,
|
|
"eval_signal/batch_coverage_25/group_std_mean": 0.4417581185698509,
|
|
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03857417032122612,
|
|
"eval_signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03857417032122612,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.4219600185751915,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.48065635561943054,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04219600185751915,
|
|
"eval_signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04219600185751915,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.3073003217577934,
|
|
"eval_signal/brier_reward/group_std_mean": 0.3613938093185425,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030730033293366432,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.030730033293366432,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10804207064211369,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12843790277838707,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010804207297042012,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010804207297042012,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.007568359375,
|
|
"eval_signal/format_reward/group_std_mean": 0.022097086533904076,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.875,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005103390081785619,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006918843020685017,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.379238038789481e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.379238038789481e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0694188978523016,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.10465933568775654,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006941889994777739,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006941889994777739,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.007568359375,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022097086533904076,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.875,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007568359724245965,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0007568359724245965,
|
|
"eval_steps_per_second": 0.106,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4378798248603018,
|
|
"calibration/batch_distribution_entropy": 0.7893052997827892,
|
|
"calibration/buffer_distribution_entropy": 0.7332999397532869,
|
|
"calibration/confidence_entropy": 0.2882561287184676,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.05731225296442688,
|
|
"calibration/coverage@30%": 0.11750360482656066,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.21578943909506018,
|
|
"calibration/mean_confidence": 0.5581830455108985,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00439453125,
|
|
"completions/max_length": 775.8,
|
|
"completions/max_terminated_length": 775.8,
|
|
"completions/mean_length": 175.36943359375,
|
|
"completions/mean_terminated_length": 176.1399139404297,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 75.2,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.0023718716111034155,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0052,
|
|
"num_tokens": 182398789.0,
|
|
"reward": 1.0906760931015014,
|
|
"reward_std": 0.24937661588191987,
|
|
"rewards/accuracy_reward": 0.38095703125,
|
|
"rewards/batch_coverage_0": 0.35660014152526853,
|
|
"rewards/batch_coverage_1": 0.35660014152526853,
|
|
"rewards/batch_coverage_10": 0.4325276672840118,
|
|
"rewards/batch_coverage_15": 0.4449883341789246,
|
|
"rewards/batch_coverage_20": 0.4588911831378937,
|
|
"rewards/batch_coverage_25": 0.463850337266922,
|
|
"rewards/batch_coverage_5": 0.3945730209350586,
|
|
"rewards/brier_reward": 0.7669905066490174,
|
|
"rewards/confidence_uniqueness_reward": 0.8827269434928894,
|
|
"rewards/format_reward": 0.9951171875,
|
|
"rewards/frontier_aurc_reward": -0.004561876039952039,
|
|
"rewards/frontier_ece_reward": 0.014202152192592622,
|
|
"rewards/frontier_entropy_batch_reward": -0.5449908971786499,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.173187255859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.21781871914863588,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.421875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0865936279296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0865936279296875,
|
|
"signal/advantage_abs_mean": 0.19311022162437438,
|
|
"signal/advantage_pre_scale_abs_mean": 0.19311022162437438,
|
|
"signal/advantage_pre_scale_std": 0.262198868393898,
|
|
"signal/advantage_std": 0.262198868393898,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.20709326565265657,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2733163952827454,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02070932649075985,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.02070932649075985,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.20709326565265657,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2733163952827454,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02070932649075985,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.02070932649075985,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.2269432097673416,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2966294169425964,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02269432060420513,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.02269432060420513,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.23294417560100555,
|
|
"signal/batch_coverage_15/group_std_mean": 0.30323171615600586,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.023294418677687646,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.023294418677687646,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.24196192026138305,
|
|
"signal/batch_coverage_20/group_std_mean": 0.313522207736969,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.024196192249655725,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.024196192249655725,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.24587258994579314,
|
|
"signal/batch_coverage_25/group_std_mean": 0.3179736495018005,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02458725869655609,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.02458725869655609,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.21568080186843872,
|
|
"signal/batch_coverage_5/group_std_mean": 0.28273540139198305,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02156808041036129,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.02156808041036129,
|
|
"signal/brier_reward/centered_abs_mean": 0.20453362464904784,
|
|
"signal/brier_reward/group_std_mean": 0.2597156286239624,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020453362539410592,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020453362539410592,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08080095127224922,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10371316969394684,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00808009523898363,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00808009523898363,
|
|
"signal/format_reward/centered_abs_mean": 0.00933837890625,
|
|
"signal/format_reward/group_std_mean": 0.02481246441602707,
|
|
"signal/format_reward/group_zero_std_frac": 0.86875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004669189453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004669189453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035921338479965926,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005274048540741205,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.490167411859147e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.490167411859147e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06365836337208748,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08838188350200653,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006365836411714554,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006365836411714554,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4541351854801178,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5231335401535034,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04541352093219757,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04541352093219757,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3748090180300953,
|
|
"calibration/batch_distribution_entropy": 0.7953933340069766,
|
|
"calibration/buffer_distribution_entropy": 0.7446203141187778,
|
|
"calibration/confidence_entropy": 0.3067337279445185,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.007827788649706457,
|
|
"calibration/coverage@15%": 0.022309197651663403,
|
|
"calibration/coverage@20%": 0.10722770914872799,
|
|
"calibration/coverage@25%": 0.18578155577299413,
|
|
"calibration/coverage@30%": 0.3459637964774951,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.15640222008453614,
|
|
"calibration/mean_confidence": 0.5451544081436507,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00107421875,
|
|
"completions/max_length": 566.4,
|
|
"completions/max_terminated_length": 566.4,
|
|
"completions/mean_length": 182.34873046875,
|
|
"completions/mean_terminated_length": 182.54359436035156,
|
|
"completions/min_length": 17.6,
|
|
"completions/min_terminated_length": 79.8,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.002586693037301302,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 199080856.0,
|
|
"reward": 1.143661379814148,
|
|
"reward_std": 0.23088187277317046,
|
|
"rewards/accuracy_reward": 0.41591796875,
|
|
"rewards/batch_coverage_0": 0.3952768325805664,
|
|
"rewards/batch_coverage_1": 0.3952768325805664,
|
|
"rewards/batch_coverage_10": 0.47167777419090273,
|
|
"rewards/batch_coverage_15": 0.4818853437900543,
|
|
"rewards/batch_coverage_20": 0.48628310561180116,
|
|
"rewards/batch_coverage_25": 0.49131479263305666,
|
|
"rewards/batch_coverage_5": 0.44672444462776184,
|
|
"rewards/brier_reward": 0.7825680017471314,
|
|
"rewards/confidence_uniqueness_reward": 0.911488664150238,
|
|
"rewards/format_reward": 0.9984375,
|
|
"rewards/frontier_aurc_reward": -0.00404461151920259,
|
|
"rewards/frontier_ece_reward": 0.01575819170102477,
|
|
"rewards/frontier_entropy_batch_reward": -0.512912106513977,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.144512939453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.19226216971874238,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0722564697265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0722564697265625,
|
|
"signal/advantage_abs_mean": 0.1781569391489029,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1781569391489029,
|
|
"signal/advantage_pre_scale_std": 0.24461511373519898,
|
|
"signal/advantage_std": 0.24461511373519898,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.19959236383438111,
|
|
"signal/batch_coverage_0/group_std_mean": 0.25877436995506287,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.019959235936403273,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.019959235936403273,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.19959236383438111,
|
|
"signal/batch_coverage_1/group_std_mean": 0.25877436995506287,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.019959235936403273,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.019959235936403273,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.22316555082798004,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2864723980426788,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.022316556051373483,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.022316556051373483,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.22717729210853577,
|
|
"signal/batch_coverage_15/group_std_mean": 0.2909119248390198,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.022717729210853577,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.022717729210853577,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.22871865928173066,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2934320390224457,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02287186644971371,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.02287186644971371,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.23193451762199402,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2979011297225952,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0231934517621994,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0231934517621994,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.2138021856546402,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2738446056842804,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.021380218863487243,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.021380218863487243,
|
|
"signal/brier_reward/centered_abs_mean": 0.18481292128562926,
|
|
"signal/brier_reward/group_std_mean": 0.2386064797639847,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018481292203068732,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018481292203068732,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05636930540204048,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07167273461818695,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005636930651962757,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005636930651962757,
|
|
"signal/format_reward/centered_abs_mean": 0.00301513671875,
|
|
"signal/format_reward/group_std_mean": 0.008502526115626097,
|
|
"signal/format_reward/group_zero_std_frac": 0.953125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001507568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001507568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027934785932302473,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0041437826585024595,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.49184814695036e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.49184814695036e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05286612808704376,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07452991306781769,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0052866128273308275,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0052866128273308275,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4506451427936554,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5177243947982788,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.045064514875411986,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.045064514875411986,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31193953094255866,
|
|
"calibration/batch_distribution_entropy": 0.809071369900361,
|
|
"calibration/buffer_distribution_entropy": 0.753271732079433,
|
|
"calibration/confidence_entropy": 0.3125224990978349,
|
|
"calibration/coverage@0%": 0.005870841487279843,
|
|
"calibration/coverage@1%": 0.005870841487279843,
|
|
"calibration/coverage@10%": 0.15184839774951076,
|
|
"calibration/coverage@15%": 0.176879739481409,
|
|
"calibration/coverage@20%": 0.19330815802348336,
|
|
"calibration/coverage@25%": 0.465513195004029,
|
|
"calibration/coverage@30%": 0.5644500253012164,
|
|
"calibration/coverage@5%": 0.01487279843444227,
|
|
"calibration/ece": 0.13437654194089554,
|
|
"calibration/mean_confidence": 0.52754010562185,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 764.4,
|
|
"completions/max_terminated_length": 764.4,
|
|
"completions/mean_length": 188.24453125,
|
|
"completions/mean_terminated_length": 188.33706665039062,
|
|
"completions/min_length": 17.4,
|
|
"completions/min_terminated_length": 81.2,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.0021228559780865908,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 216040704.0,
|
|
"reward": 1.1614436149597167,
|
|
"reward_std": 0.22528802156448363,
|
|
"rewards/accuracy_reward": 0.45263671875,
|
|
"rewards/batch_coverage_0": 0.37199272513389586,
|
|
"rewards/batch_coverage_1": 0.37199272513389586,
|
|
"rewards/batch_coverage_10": 0.46476385593414304,
|
|
"rewards/batch_coverage_15": 0.47402478456497193,
|
|
"rewards/batch_coverage_20": 0.47720582485198976,
|
|
"rewards/batch_coverage_25": 0.4847422957420349,
|
|
"rewards/batch_coverage_5": 0.42998725175857544,
|
|
"rewards/brier_reward": 0.8034541726112365,
|
|
"rewards/confidence_uniqueness_reward": 0.9194929838180542,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.0033801186364144087,
|
|
"rewards/frontier_ece_reward": 0.01981247924268246,
|
|
"rewards/frontier_entropy_batch_reward": -0.460911363363266,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.157464599609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.20806764662265778,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.39375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0787322998046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0787322998046875,
|
|
"signal/advantage_abs_mean": 0.17317451536655426,
|
|
"signal/advantage_pre_scale_abs_mean": 0.17317451536655426,
|
|
"signal/advantage_pre_scale_std": 0.23805280029773712,
|
|
"signal/advantage_std": 0.23805280029773712,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.20832762718200684,
|
|
"signal/batch_coverage_0/group_std_mean": 0.26984869241714476,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.020832763239741325,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.020832763239741325,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.20832762718200684,
|
|
"signal/batch_coverage_1/group_std_mean": 0.26984869241714476,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.020832763239741325,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.020832763239741325,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.2282239854335785,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2933039665222168,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02282239906489849,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.02282239906489849,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.2308487683534622,
|
|
"signal/batch_coverage_15/group_std_mean": 0.296176677942276,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02308487668633461,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.02308487668633461,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.22977923154830932,
|
|
"signal/batch_coverage_20/group_std_mean": 0.29552650451660156,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.022977923229336738,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.022977923229336738,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.23684509396553038,
|
|
"signal/batch_coverage_25/group_std_mean": 0.30428668260574343,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.023684510216116905,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.023684510216116905,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.21925785541534423,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2825684487819672,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.021925785392522813,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.021925785392522813,
|
|
"signal/brier_reward/centered_abs_mean": 0.1792667180299759,
|
|
"signal/brier_reward/group_std_mean": 0.2295171409845352,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017926672659814357,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017926672659814357,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.051846512407064435,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06691792458295823,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0051846509799361225,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0051846509799361225,
|
|
"signal/format_reward/centered_abs_mean": 0.0018798828125,
|
|
"signal/format_reward/group_std_mean": 0.005187963182106614,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023344816174358128,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003417077288031578,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9181021091062576e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9181021091062576e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04670820683240891,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06903419941663742,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004670820478349924,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004670820478349924,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4435045063495636,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5081867694854736,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.044350451231002806,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044350451231002806,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3552052316324844,
|
|
"calibration/batch_distribution_entropy": 0.8085204306568803,
|
|
"calibration/buffer_distribution_entropy": 0.7622269030192862,
|
|
"calibration/confidence_entropy": 0.30718985568153173,
|
|
"calibration/coverage@0%": 0.004705882352941176,
|
|
"calibration/coverage@1%": 0.004705882352941176,
|
|
"calibration/coverage@10%": 0.10938797436782932,
|
|
"calibration/coverage@15%": 0.16579870304286098,
|
|
"calibration/coverage@20%": 0.32641418211120066,
|
|
"calibration/coverage@25%": 0.41727178542649934,
|
|
"calibration/coverage@30%": 0.49365949119373775,
|
|
"calibration/coverage@5%": 0.02,
|
|
"calibration/ece": 0.13717859469131563,
|
|
"calibration/mean_confidence": 0.41138431787069046,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0021484375,
|
|
"completions/max_length": 635.8,
|
|
"completions/max_terminated_length": 635.8,
|
|
"completions/mean_length": 189.36298828125,
|
|
"completions/mean_terminated_length": 189.77273254394532,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 86.8,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0016844564815983176,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0019,
|
|
"num_tokens": 233132965.0,
|
|
"reward": 1.1587292671203613,
|
|
"reward_std": 0.21179547905921936,
|
|
"rewards/accuracy_reward": 0.39658203125,
|
|
"rewards/batch_coverage_0": 0.43869972229003906,
|
|
"rewards/batch_coverage_1": 0.43869972229003906,
|
|
"rewards/batch_coverage_10": 0.5035058617591858,
|
|
"rewards/batch_coverage_15": 0.5092388391494751,
|
|
"rewards/batch_coverage_20": 0.5123389482498169,
|
|
"rewards/batch_coverage_25": 0.5107143759727478,
|
|
"rewards/batch_coverage_5": 0.4755396842956543,
|
|
"rewards/brier_reward": 0.8166243195533752,
|
|
"rewards/confidence_uniqueness_reward": 0.8880064725875855,
|
|
"rewards/format_reward": 0.9974609375,
|
|
"rewards/frontier_aurc_reward": -0.003564742440357804,
|
|
"rewards/frontier_ece_reward": 0.01565170958638191,
|
|
"rewards/frontier_entropy_batch_reward": -0.49149676561355593,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.147259521484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.19043090045452118,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.475,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0736297607421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0736297607421875,
|
|
"signal/advantage_abs_mean": 0.16093077659606933,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16093077659606933,
|
|
"signal/advantage_pre_scale_std": 0.22910359501838684,
|
|
"signal/advantage_std": 0.22910359501838684,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.19967867136001588,
|
|
"signal/batch_coverage_0/group_std_mean": 0.25975680351257324,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01996786817908287,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01996786817908287,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.19967867136001588,
|
|
"signal/batch_coverage_1/group_std_mean": 0.25975680351257324,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01996786817908287,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01996786817908287,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.21668496429920198,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2810732364654541,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02166849635541439,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.02166849635541439,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.21663637459278107,
|
|
"signal/batch_coverage_15/group_std_mean": 0.28133258819580076,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.021663638204336165,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.021663638204336165,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.21716900467872619,
|
|
"signal/batch_coverage_20/group_std_mean": 0.28218746185302734,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.021716900169849396,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.021716900169849396,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.21201648116111754,
|
|
"signal/batch_coverage_25/group_std_mean": 0.27759831547737124,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02120164819061756,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.02120164819061756,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.20889467895030975,
|
|
"signal/batch_coverage_5/group_std_mean": 0.27032582759857177,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02088946886360645,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.02088946886360645,
|
|
"signal/brier_reward/centered_abs_mean": 0.1668454587459564,
|
|
"signal/brier_reward/group_std_mean": 0.218032369017601,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01668454669415951,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01668454669415951,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07639306187629699,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09777989089488984,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0076393064111471174,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0076393064111471174,
|
|
"signal/format_reward/centered_abs_mean": 0.00484619140625,
|
|
"signal/format_reward/group_std_mean": 0.012622351385653019,
|
|
"signal/format_reward/group_zero_std_frac": 0.934375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002423095703125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.002423095703125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002311847684904933,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003476549405604601,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8898097298224457e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8898097298224457e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.039065159112215045,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05721670910716057,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003906515752896666,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003906515752896666,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4242829144001007,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4961832225322723,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042428291589021686,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042428291589021686,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.39189889727631827,
|
|
"calibration/batch_distribution_entropy": 0.8065487656414344,
|
|
"calibration/buffer_distribution_entropy": 0.7683577524517988,
|
|
"calibration/confidence_entropy": 0.30581510958839314,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.145703125,
|
|
"calibration/coverage@15%": 0.1953125,
|
|
"calibration/coverage@20%": 0.253515625,
|
|
"calibration/coverage@25%": 0.291015625,
|
|
"calibration/coverage@30%": 0.346484375,
|
|
"calibration/coverage@5%": 0.094921875,
|
|
"calibration/ece": 0.16096194237733213,
|
|
"calibration/mean_confidence": 0.4557611454920851,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 727.8,
|
|
"completions/max_terminated_length": 727.8,
|
|
"completions/mean_length": 193.5591796875,
|
|
"completions/mean_terminated_length": 193.65261840820312,
|
|
"completions/min_length": 32.8,
|
|
"completions/min_terminated_length": 84.2,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.002140032360330224,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 250366691.0,
|
|
"reward": 1.1608482837677,
|
|
"reward_std": 0.2237977057695389,
|
|
"rewards/accuracy_reward": 0.44482421875,
|
|
"rewards/batch_coverage_0": 0.40374218821525576,
|
|
"rewards/batch_coverage_1": 0.40374218821525576,
|
|
"rewards/batch_coverage_10": 0.4652402937412262,
|
|
"rewards/batch_coverage_15": 0.4752448081970215,
|
|
"rewards/batch_coverage_20": 0.48339633345603944,
|
|
"rewards/batch_coverage_25": 0.4878033041954041,
|
|
"rewards/batch_coverage_5": 0.43672565221786497,
|
|
"rewards/brier_reward": 0.8111422181129455,
|
|
"rewards/confidence_uniqueness_reward": 0.8884037733078003,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0031261141411960125,
|
|
"rewards/frontier_ece_reward": 0.018053279910236596,
|
|
"rewards/frontier_entropy_batch_reward": -0.48581200242042544,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.176092529296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.22773178815841674,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.371875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0880462646484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0880462646484375,
|
|
"signal/advantage_abs_mean": 0.17269106805324555,
|
|
"signal/advantage_pre_scale_abs_mean": 0.17269106805324555,
|
|
"signal/advantage_pre_scale_std": 0.2400616377592087,
|
|
"signal/advantage_std": 0.2400616377592087,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.21278543770313263,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2748673528432846,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.021278544515371322,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.021278544515371322,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.21278543770313263,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2748673528432846,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.021278544515371322,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.021278544515371322,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.2333226978778839,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2996509253978729,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.023332270979881286,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.023332270979881286,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.23160504698753356,
|
|
"signal/batch_coverage_15/group_std_mean": 0.2971563279628754,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.023160504922270776,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.023160504922270776,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.23060874938964843,
|
|
"signal/batch_coverage_20/group_std_mean": 0.29717103838920594,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02306087501347065,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.02306087501347065,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.23110208213329314,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2988200902938843,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02311020828783512,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.02311020828783512,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.22297326028347014,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2872433841228485,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02229732573032379,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.02229732573032379,
|
|
"signal/brier_reward/centered_abs_mean": 0.17840671837329863,
|
|
"signal/brier_reward/group_std_mean": 0.2301701694726944,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01784067116677761,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01784067116677761,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07509338706731797,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09538438469171524,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0075093389488756655,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0075093389488756655,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025602192617952824,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003933770721778274,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.200274150003679e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.200274150003679e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.040749994665384294,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06029489189386368,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00407499959692359,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00407499959692359,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.43094528913497926,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5003817081451416,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04309452995657921,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04309452995657921,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34841073189177924,
|
|
"calibration/batch_distribution_entropy": 0.7404371928462201,
|
|
"calibration/buffer_distribution_entropy": 0.7727618186889185,
|
|
"calibration/confidence_entropy": 0.26339871275418164,
|
|
"calibration/coverage@0%": 0.01796875,
|
|
"calibration/coverage@1%": 0.01796875,
|
|
"calibration/coverage@10%": 0.232421875,
|
|
"calibration/coverage@15%": 0.30234375,
|
|
"calibration/coverage@20%": 0.35078125,
|
|
"calibration/coverage@25%": 0.3921875,
|
|
"calibration/coverage@30%": 0.44260258683953035,
|
|
"calibration/coverage@5%": 0.061328125,
|
|
"calibration/ece": 0.1280873174322534,
|
|
"calibration/mean_confidence": 0.4740620152862841,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 793.2,
|
|
"completions/max_terminated_length": 793.2,
|
|
"completions/mean_length": 187.65595703125,
|
|
"completions/mean_terminated_length": 187.76578369140626,
|
|
"completions/min_length": 33.8,
|
|
"completions/min_terminated_length": 82.0,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.003614980261772871,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 267343104.0,
|
|
"reward": 1.175432276725769,
|
|
"reward_std": 0.20357095897197724,
|
|
"rewards/accuracy_reward": 0.4142578125,
|
|
"rewards/batch_coverage_0": 0.4698097288608551,
|
|
"rewards/batch_coverage_1": 0.4698097288608551,
|
|
"rewards/batch_coverage_10": 0.5149709939956665,
|
|
"rewards/batch_coverage_15": 0.5213613927364349,
|
|
"rewards/batch_coverage_20": 0.5291022181510925,
|
|
"rewards/batch_coverage_25": 0.5314548969268799,
|
|
"rewards/batch_coverage_5": 0.4935650169849396,
|
|
"rewards/brier_reward": 0.8144651532173157,
|
|
"rewards/confidence_uniqueness_reward": 0.8675598978996277,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0034637684002518654,
|
|
"rewards/frontier_ece_reward": 0.020288588479161263,
|
|
"rewards/frontier_entropy_batch_reward": -0.5450147032737732,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14793701171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.18589730560779572,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.073968505859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.073968505859375,
|
|
"signal/advantage_abs_mean": 0.1570839136838913,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1570839136838913,
|
|
"signal/advantage_pre_scale_std": 0.22588954865932465,
|
|
"signal/advantage_std": 0.22588954865932465,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.19263360798358917,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2533486902713776,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.019263360276818277,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.019263360276818277,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.19263360798358917,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2533486902713776,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.019263360276818277,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.019263360276818277,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.20532942712306976,
|
|
"signal/batch_coverage_10/group_std_mean": 0.26716126799583434,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.020532942935824396,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.020532942935824396,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.20610760152339935,
|
|
"signal/batch_coverage_15/group_std_mean": 0.26746096909046174,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.020610759779810906,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.020610759779810906,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.20913188457489013,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2716536849737167,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02091318890452385,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.02091318890452385,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.2067886620759964,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2700066208839417,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020678867399692536,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.020678867399692536,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.19935680627822877,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2602789878845215,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01993568167090416,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01993568167090416,
|
|
"signal/brier_reward/centered_abs_mean": 0.16666122376918793,
|
|
"signal/brier_reward/group_std_mean": 0.21535793244838713,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016666123270988466,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016666123270988466,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08296736031770706,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10238117724657059,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008296736143529415,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008296736143529415,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029713909141719343,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004420665092766285,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7142387009225784e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7142387009225784e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04267800748348236,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.060703708231449126,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004267800692468881,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004267800692468881,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40921489596366883,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4904153048992157,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.040921490639448166,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.040921490639448166,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4095207420497604,
|
|
"calibration/batch_distribution_entropy": 0.7292892820522248,
|
|
"calibration/buffer_distribution_entropy": 0.7714704851416797,
|
|
"calibration/confidence_entropy": 0.24582674132048177,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.10450097847358122,
|
|
"calibration/coverage@20%": 0.17142857142857143,
|
|
"calibration/coverage@25%": 0.24810114970645794,
|
|
"calibration/coverage@30%": 0.2965852800880626,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.14765774260001646,
|
|
"calibration/mean_confidence": 0.4614050591762385,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 556.0,
|
|
"completions/max_terminated_length": 556.0,
|
|
"completions/mean_length": 194.88779296875,
|
|
"completions/mean_terminated_length": 195.06065979003907,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 80.4,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.002637905301526189,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 284304451.0,
|
|
"reward": 1.1642091512680053,
|
|
"reward_std": 0.21782591938972473,
|
|
"rewards/accuracy_reward": 0.40166015625,
|
|
"rewards/batch_coverage_0": 0.45555404424667356,
|
|
"rewards/batch_coverage_1": 0.45555404424667356,
|
|
"rewards/batch_coverage_10": 0.5190391063690185,
|
|
"rewards/batch_coverage_15": 0.5268309950828552,
|
|
"rewards/batch_coverage_20": 0.5372787833213806,
|
|
"rewards/batch_coverage_25": 0.5389063596725464,
|
|
"rewards/batch_coverage_5": 0.4952739357948303,
|
|
"rewards/brier_reward": 0.8077268123626709,
|
|
"rewards/confidence_uniqueness_reward": 0.8441998481750488,
|
|
"rewards/format_reward": 0.998828125,
|
|
"rewards/frontier_aurc_reward": -0.003703146381303668,
|
|
"rewards/frontier_ece_reward": 0.018940356373786927,
|
|
"rewards/frontier_entropy_batch_reward": -0.5591914057731628,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.147406005859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1893194407224655,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.48125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0737030029296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0737030029296875,
|
|
"signal/advantage_abs_mean": 0.16743640899658202,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16743640899658202,
|
|
"signal/advantage_pre_scale_std": 0.2411186933517456,
|
|
"signal/advantage_std": 0.2411186933517456,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.19905173778533936,
|
|
"signal/batch_coverage_0/group_std_mean": 0.26072719097137453,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.019905174523591994,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.019905174523591994,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.19905173778533936,
|
|
"signal/batch_coverage_1/group_std_mean": 0.26072719097137453,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.019905174523591994,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.019905174523591994,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.21653661429882048,
|
|
"signal/batch_coverage_10/group_std_mean": 0.28195360898971555,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.021653661131858827,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.021653661131858827,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.2188299685716629,
|
|
"signal/batch_coverage_15/group_std_mean": 0.2830525070428848,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.021882996335625647,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.021882996335625647,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.2214237332344055,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2871974349021912,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.022142373397946356,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.022142373397946356,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.2215735673904419,
|
|
"signal/batch_coverage_25/group_std_mean": 0.28793606758117674,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.022157356888055802,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.022157356888055802,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.20815829634666444,
|
|
"signal/batch_coverage_5/group_std_mean": 0.27133035063743594,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.020815829932689666,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.020815829932689666,
|
|
"signal/brier_reward/centered_abs_mean": 0.17668550610542297,
|
|
"signal/brier_reward/group_std_mean": 0.2282479226589203,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017668551579117776,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017668551579117776,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10292446613311768,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.12634139955043794,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010292447078973055,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010292447078973055,
|
|
"signal/format_reward/centered_abs_mean": 0.00225830078125,
|
|
"signal/format_reward/group_std_mean": 0.0062928176019340755,
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001129150390625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001129150390625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003706426313146949,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005361904297024012,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.633032949641347e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.633032949641347e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.043428726494312286,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06040780916810036,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004342872835695744,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004342872835695744,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42182513475418093,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4972574055194855,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0421825148165226,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0421825148165226,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.38642271935922723,
|
|
"calibration/batch_distribution_entropy": 0.7343121751663448,
|
|
"calibration/buffer_distribution_entropy": 0.7698096977080258,
|
|
"calibration/confidence_entropy": 0.24101454783337828,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.015655577299412915,
|
|
"calibration/coverage@20%": 0.07475538160469666,
|
|
"calibration/coverage@25%": 0.12211350293542074,
|
|
"calibration/coverage@30%": 0.3546370474559687,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.15231339925359932,
|
|
"calibration/mean_confidence": 0.5120636185686182,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00107421875,
|
|
"completions/max_length": 774.6,
|
|
"completions/max_terminated_length": 774.6,
|
|
"completions/mean_length": 192.52734375,
|
|
"completions/mean_terminated_length": 192.73372497558594,
|
|
"completions/min_length": 16.8,
|
|
"completions/min_terminated_length": 88.8,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.003030631458386779,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 301234107.0,
|
|
"reward": 1.1730293035507202,
|
|
"reward_std": 0.21305195093154908,
|
|
"rewards/accuracy_reward": 0.39794921875,
|
|
"rewards/batch_coverage_0": 0.4744568645954132,
|
|
"rewards/batch_coverage_1": 0.4744568645954132,
|
|
"rewards/batch_coverage_10": 0.5303399085998535,
|
|
"rewards/batch_coverage_15": 0.5415215492248535,
|
|
"rewards/batch_coverage_20": 0.551158607006073,
|
|
"rewards/batch_coverage_25": 0.5548080563545227,
|
|
"rewards/batch_coverage_5": 0.5028053164482117,
|
|
"rewards/brier_reward": 0.8083776473999024,
|
|
"rewards/confidence_uniqueness_reward": 0.8495068907737732,
|
|
"rewards/format_reward": 0.998828125,
|
|
"rewards/frontier_aurc_reward": -0.00370310852304101,
|
|
"rewards/frontier_ece_reward": 0.020712151564657687,
|
|
"rewards/frontier_entropy_batch_reward": -0.561275064945221,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.138385009765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1806561380624771,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0691925048828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0691925048828125,
|
|
"signal/advantage_abs_mean": 0.16109004020690917,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16109004020690917,
|
|
"signal/advantage_pre_scale_std": 0.2384302794933319,
|
|
"signal/advantage_std": 0.2384302794933319,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.18493348658084868,
|
|
"signal/batch_coverage_0/group_std_mean": 0.24907850623130798,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018493348360061647,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.018493348360061647,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.18493348658084868,
|
|
"signal/batch_coverage_1/group_std_mean": 0.24907850623130798,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018493348360061647,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.018493348360061647,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.19726809859275818,
|
|
"signal/batch_coverage_10/group_std_mean": 0.26376489698886874,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.019726810976862907,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.019726810976862907,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.20315858125686645,
|
|
"signal/batch_coverage_15/group_std_mean": 0.27094233632087705,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.020315859094262124,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.020315859094262124,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.21178129315376282,
|
|
"signal/batch_coverage_20/group_std_mean": 0.28007384538650515,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02117813006043434,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.02117813006043434,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.21607309579849243,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2853268563747406,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.021607310324907304,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.021607310324907304,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.19155743420124055,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2568057715892792,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01915574409067631,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01915574409067631,
|
|
"signal/brier_reward/centered_abs_mean": 0.1633100241422653,
|
|
"signal/brier_reward/group_std_mean": 0.21677840054035186,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016331002302467822,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016331002302467822,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0984340637922287,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1197776436805725,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009843406639993191,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009843406639993191,
|
|
"signal/format_reward/centered_abs_mean": 0.0022705078125,
|
|
"signal/format_reward/group_std_mean": 0.006629125867038965,
|
|
"signal/format_reward/group_zero_std_frac": 0.9625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00113525390625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035885621327906847,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005224420595914126,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.485702738747932e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.485702738747932e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04839966595172882,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07236144691705704,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048399668186903,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048399668186903,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.41489365696907043,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.49374261498451233,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.041489367932081224,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.041489367932081224,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3088800902089267,
|
|
"calibration/batch_distribution_entropy": 0.7244132146017391,
|
|
"calibration/buffer_distribution_entropy": 0.7693529126591884,
|
|
"calibration/confidence_entropy": 0.23922881391576922,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.14240230552837574,
|
|
"calibration/coverage@15%": 0.23816734955968685,
|
|
"calibration/coverage@20%": 0.2624151479941291,
|
|
"calibration/coverage@25%": 0.3132254464285714,
|
|
"calibration/coverage@30%": 0.5234031005381604,
|
|
"calibration/coverage@5%": 0.05593658268101761,
|
|
"calibration/ece": 0.13676851084610558,
|
|
"calibration/mean_confidence": 0.5120746492478866,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 617.4,
|
|
"completions/max_terminated_length": 617.4,
|
|
"completions/mean_length": 201.907421875,
|
|
"completions/mean_terminated_length": 202.04771118164064,
|
|
"completions/min_length": 17.4,
|
|
"completions/min_terminated_length": 90.2,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0020372753497213125,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 318231591.0,
|
|
"reward": 1.168937587738037,
|
|
"reward_std": 0.21276797950267792,
|
|
"rewards/accuracy_reward": 0.4037109375,
|
|
"rewards/batch_coverage_0": 0.4556680262088776,
|
|
"rewards/batch_coverage_1": 0.4556680262088776,
|
|
"rewards/batch_coverage_10": 0.5263420939445496,
|
|
"rewards/batch_coverage_15": 0.5296601533889771,
|
|
"rewards/batch_coverage_20": 0.5361104488372803,
|
|
"rewards/batch_coverage_25": 0.538708108663559,
|
|
"rewards/batch_coverage_5": 0.5019454300403595,
|
|
"rewards/brier_reward": 0.8099380731582642,
|
|
"rewards/confidence_uniqueness_reward": 0.838314151763916,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0033911903388798236,
|
|
"rewards/frontier_ece_reward": 0.020074135437607766,
|
|
"rewards/frontier_entropy_batch_reward": -0.5372773051261902,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13790283203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.18555650413036345,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.068951416015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.068951416015625,
|
|
"signal/advantage_abs_mean": 0.16150518357753754,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16150518357753754,
|
|
"signal/advantage_pre_scale_std": 0.23679069578647613,
|
|
"signal/advantage_std": 0.23679069578647613,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.19015144407749177,
|
|
"signal/batch_coverage_0/group_std_mean": 0.24815911054611206,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01901514418423176,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01901514418423176,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.19015144407749177,
|
|
"signal/batch_coverage_1/group_std_mean": 0.24815911054611206,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01901514418423176,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01901514418423176,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.21041842699050903,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2728098422288895,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02104184255003929,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.02104184255003929,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.20971550941467285,
|
|
"signal/batch_coverage_15/group_std_mean": 0.27206642627716066,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.020971550419926644,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.020971550419926644,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.21126347482204438,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2744140148162842,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.021126347407698632,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.021126347407698632,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.21306751370429994,
|
|
"signal/batch_coverage_25/group_std_mean": 0.27628334164619445,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.021306751295924187,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.021306751295924187,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.2033386319875717,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2633496135473251,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.020333864167332648,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.020333864167332648,
|
|
"signal/brier_reward/centered_abs_mean": 0.1642611026763916,
|
|
"signal/brier_reward/group_std_mean": 0.21632223427295685,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01642611101269722,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01642611101269722,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10172502249479294,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1260932356119156,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010172502510249615,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010172502510249615,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029861139133572578,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00451540406793356,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7326425808714705e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7326425808714705e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.043954838812351224,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06721483990550041,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004395484086126089,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004395484086126089,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4110023260116577,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.48685582876205447,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.041100232303142546,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.041100232303142546,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31066769725534477,
|
|
"calibration/batch_distribution_entropy": 0.7408097644771414,
|
|
"calibration/buffer_distribution_entropy": 0.7695192210995658,
|
|
"calibration/confidence_entropy": 0.24489852964300565,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.101171875,
|
|
"calibration/coverage@15%": 0.3365475171232877,
|
|
"calibration/coverage@20%": 0.3885350415851272,
|
|
"calibration/coverage@25%": 0.43192346501956946,
|
|
"calibration/coverage@30%": 0.4780447345890411,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.16255623477764822,
|
|
"calibration/mean_confidence": 0.5092909846624645,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 623.0,
|
|
"completions/max_terminated_length": 623.0,
|
|
"completions/mean_length": 203.51396484375,
|
|
"completions/mean_terminated_length": 203.69292907714845,
|
|
"completions/min_length": 16.8,
|
|
"completions/min_terminated_length": 83.4,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.001760556478984654,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 335404278.0,
|
|
"reward": 1.2145358324050903,
|
|
"reward_std": 0.20452672243118286,
|
|
"rewards/accuracy_reward": 0.44296875,
|
|
"rewards/batch_coverage_0": 0.49820048213005064,
|
|
"rewards/batch_coverage_1": 0.49820048213005064,
|
|
"rewards/batch_coverage_10": 0.556191599369049,
|
|
"rewards/batch_coverage_15": 0.5629641294479371,
|
|
"rewards/batch_coverage_20": 0.5709343194961548,
|
|
"rewards/batch_coverage_25": 0.5740790128707886,
|
|
"rewards/batch_coverage_5": 0.5346252918243408,
|
|
"rewards/brier_reward": 0.810669207572937,
|
|
"rewards/confidence_uniqueness_reward": 0.8535032391548156,
|
|
"rewards/format_reward": 0.99873046875,
|
|
"rewards/frontier_aurc_reward": -0.0030829327646642924,
|
|
"rewards/frontier_ece_reward": 0.027932414039969444,
|
|
"rewards/frontier_entropy_batch_reward": -0.5500531315803527,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11822509765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15758318901062013,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059112548828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.059112548828125,
|
|
"signal/advantage_abs_mean": 0.15452314913272858,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15452314913272858,
|
|
"signal/advantage_pre_scale_std": 0.23202507495880126,
|
|
"signal/advantage_std": 0.23202507495880126,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.17627234160900115,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23309228122234343,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017627234011888503,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.017627234011888503,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.17627234160900115,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23309228122234343,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017627234011888503,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.017627234011888503,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.19112071096897126,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2518125683069229,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01911207064986229,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01911207064986229,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.19524294137954712,
|
|
"signal/batch_coverage_15/group_std_mean": 0.2568311929702759,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.019524294510483743,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.019524294510483743,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.19963866770267485,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2630015403032303,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019963867589831354,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.019963867589831354,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.2009682387113571,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2646039962768555,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020096823945641516,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.020096823945641516,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18442264795303345,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2439052492380142,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.018442264944314956,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.018442264944314956,
|
|
"signal/brier_reward/centered_abs_mean": 0.16162406504154206,
|
|
"signal/brier_reward/group_std_mean": 0.2116384744644165,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01616240683943033,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01616240683943033,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09289654493331909,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11365949809551239,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009289654716849328,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009289654716849328,
|
|
"signal/format_reward/centered_abs_mean": 0.002447509765625,
|
|
"signal/format_reward/group_std_mean": 0.006845244579017163,
|
|
"signal/format_reward/group_zero_std_frac": 0.9625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012237548828125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012237548828125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030647614039480686,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004521076194941997,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.830951754935086e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.830951754935086e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04688786193728447,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07207571268081665,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004688786249607802,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004688786249607802,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40426061153411863,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4829357087612152,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.040426061302423474,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.040426061302423474,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.569440617189013,
|
|
"eval_calibration/batch_distribution_entropy": 0.6703147758360166,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7686206731225668,
|
|
"eval_calibration/confidence_entropy": 0.23202965696476052,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0,
|
|
"eval_calibration/coverage@15%": 0.0703125,
|
|
"eval_calibration/coverage@20%": 0.078125,
|
|
"eval_calibration/coverage@25%": 0.09375,
|
|
"eval_calibration/coverage@30%": 0.203125,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.2418025333544978,
|
|
"eval_calibration/mean_confidence": 0.43286503335449783,
|
|
"eval_completions/clipped_ratio": 0.001953125,
|
|
"eval_completions/max_length": 510.25,
|
|
"eval_completions/max_terminated_length": 510.25,
|
|
"eval_completions/mean_length": 205.94585037231445,
|
|
"eval_completions/mean_terminated_length": 206.36303329467773,
|
|
"eval_completions/min_length": 82.5,
|
|
"eval_completions/min_terminated_length": 107.25,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 335404278.0,
|
|
"eval_reward": 0.9630002677440643,
|
|
"eval_reward_std": 0.27110686898231506,
|
|
"eval_rewards/accuracy_reward": 0.33984375,
|
|
"eval_rewards/batch_coverage_0": 0.35313040763139725,
|
|
"eval_rewards/batch_coverage_1": 0.35313040763139725,
|
|
"eval_rewards/batch_coverage_10": 0.35056068003177643,
|
|
"eval_rewards/batch_coverage_15": 0.32362698018550873,
|
|
"eval_rewards/batch_coverage_20": 0.3047519400715828,
|
|
"eval_rewards/batch_coverage_25": 0.2832332253456116,
|
|
"eval_rewards/batch_coverage_5": 0.35313040763139725,
|
|
"eval_rewards/brier_reward": 0.7975546419620514,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8064673691987991,
|
|
"eval_rewards/format_reward": 0.998046875,
|
|
"eval_rewards/frontier_aurc_reward": -0.00390950427390635,
|
|
"eval_rewards/frontier_ece_reward": 0.013499133347067982,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.998046875,
|
|
"eval_runtime": 30.6636,
|
|
"eval_samples_per_second": 16.306,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.440185546875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.47584959864616394,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2200927734375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2200927734375,
|
|
"eval_signal/advantage_abs_mean": 0.22441067546606064,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22441067546606064,
|
|
"eval_signal/advantage_pre_scale_std": 0.269348181784153,
|
|
"eval_signal/advantage_std": 0.269348181784153,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.4600137919187546,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.5194766819477081,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04600138030946255,
|
|
"eval_signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04600138030946255,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.4600137919187546,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.5194766819477081,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04600138030946255,
|
|
"eval_signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04600138030946255,
|
|
"eval_signal/batch_coverage_10/centered_abs_mean": 0.457081101834774,
|
|
"eval_signal/batch_coverage_10/group_std_mean": 0.5163652151823044,
|
|
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.045708111487329006,
|
|
"eval_signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.045708111487329006,
|
|
"eval_signal/batch_coverage_15/centered_abs_mean": 0.41683706641197205,
|
|
"eval_signal/batch_coverage_15/group_std_mean": 0.4740506485104561,
|
|
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04168370831757784,
|
|
"eval_signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04168370831757784,
|
|
"eval_signal/batch_coverage_20/centered_abs_mean": 0.39310214668512344,
|
|
"eval_signal/batch_coverage_20/group_std_mean": 0.4481876716017723,
|
|
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03931021690368652,
|
|
"eval_signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.03931021690368652,
|
|
"eval_signal/batch_coverage_25/centered_abs_mean": 0.369586318731308,
|
|
"eval_signal/batch_coverage_25/group_std_mean": 0.4246791750192642,
|
|
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03695863112807274,
|
|
"eval_signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03695863112807274,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.4600137919187546,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.5194766819477081,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04600138030946255,
|
|
"eval_signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04600138030946255,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.26220837235450745,
|
|
"eval_signal/brier_reward/group_std_mean": 0.3284427151083946,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026220838073641062,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.026220838073641062,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.13375214487314224,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.15917536616325378,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013375215232372284,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013375215232372284,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0043173496960662305,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006625176058150828,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3966873565514106e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3966873565514106e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05632109194993973,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0933114867657423,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00563210912514478,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00563210912514478,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003784179862122983,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003784179862122983,
|
|
"eval_steps_per_second": 0.13,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35455243453590024,
|
|
"calibration/batch_distribution_entropy": 0.7786134085679792,
|
|
"calibration/buffer_distribution_entropy": 0.768577979834517,
|
|
"calibration/confidence_entropy": 0.2686558213720398,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.07123287671232877,
|
|
"calibration/coverage@15%": 0.08180039138943249,
|
|
"calibration/coverage@20%": 0.09041095890410958,
|
|
"calibration/coverage@25%": 0.21627181780246346,
|
|
"calibration/coverage@30%": 0.43795605142742033,
|
|
"calibration/coverage@5%": 0.020743639921722113,
|
|
"calibration/ece": 0.16696759740323017,
|
|
"calibration/mean_confidence": 0.45578866359148174,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 541.4,
|
|
"completions/max_terminated_length": 541.4,
|
|
"completions/mean_length": 206.63544921875,
|
|
"completions/mean_terminated_length": 206.79442749023437,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 89.2,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0022354216780513525,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 352242657.0,
|
|
"reward": 1.192155623435974,
|
|
"reward_std": 0.19818204939365386,
|
|
"rewards/accuracy_reward": 0.45048828125,
|
|
"rewards/batch_coverage_0": 0.4518804371356964,
|
|
"rewards/batch_coverage_1": 0.4518804371356964,
|
|
"rewards/batch_coverage_10": 0.5132352769374847,
|
|
"rewards/batch_coverage_15": 0.5222086906433105,
|
|
"rewards/batch_coverage_20": 0.5237145602703095,
|
|
"rewards/batch_coverage_25": 0.5243976712226868,
|
|
"rewards/batch_coverage_5": 0.4776782333850861,
|
|
"rewards/brier_reward": 0.7963641285896301,
|
|
"rewards/confidence_uniqueness_reward": 0.8801257133483886,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.003094083955511451,
|
|
"rewards/frontier_ece_reward": 0.024742235243320466,
|
|
"rewards/frontier_entropy_batch_reward": -0.4923313021659851,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.129144287109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.17095574140548705,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0645721435546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0645721435546875,
|
|
"signal/advantage_abs_mean": 0.15149070620536803,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15149070620536803,
|
|
"signal/advantage_pre_scale_std": 0.21988919079303743,
|
|
"signal/advantage_std": 0.21988919079303743,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.18348737359046935,
|
|
"signal/batch_coverage_0/group_std_mean": 0.24079204499721527,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018348737433552743,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.018348737433552743,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.18348737359046935,
|
|
"signal/batch_coverage_1/group_std_mean": 0.24079204499721527,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018348737433552743,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.018348737433552743,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.19880570769309996,
|
|
"signal/batch_coverage_10/group_std_mean": 0.26094971001148226,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.019880571216344834,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.019880571216344834,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.20224025547504426,
|
|
"signal/batch_coverage_15/group_std_mean": 0.26526222229003904,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02022402621805668,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.02022402621805668,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.20068714618682862,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2642544090747833,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.020068715140223504,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.020068715140223504,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.20156602263450624,
|
|
"signal/batch_coverage_25/group_std_mean": 0.26549296975135805,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020156602561473846,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.020156602561473846,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.19100910425186157,
|
|
"signal/batch_coverage_5/group_std_mean": 0.24971230030059816,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.019100910797715188,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.019100910797715188,
|
|
"signal/brier_reward/centered_abs_mean": 0.16946334838867189,
|
|
"signal/brier_reward/group_std_mean": 0.2165478616952896,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016946335881948472,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016946335881948472,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07198369279503822,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09141376763582229,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0071983693167567255,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0071983693167567255,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_std_mean": 0.004971844330430031,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003002920467406511,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004374483227729797,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7536507443292065e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7536507443292065e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04616122543811798,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07393098026514053,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004616122413426638,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004616122413426638,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.391942685842514,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.46693851351737975,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03919426798820495,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03919426798820495,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35680028964650196,
|
|
"calibration/batch_distribution_entropy": 0.7071003218034573,
|
|
"calibration/buffer_distribution_entropy": 0.7643713160726849,
|
|
"calibration/confidence_entropy": 0.23301250797379686,
|
|
"calibration/coverage@0%": 0.025415851272015656,
|
|
"calibration/coverage@1%": 0.025415851272015656,
|
|
"calibration/coverage@10%": 0.1735047700587084,
|
|
"calibration/coverage@15%": 0.21611041462818004,
|
|
"calibration/coverage@20%": 0.3301866744129159,
|
|
"calibration/coverage@25%": 0.4040285591976517,
|
|
"calibration/coverage@30%": 0.4364657228473581,
|
|
"calibration/coverage@5%": 0.10628439946183951,
|
|
"calibration/ece": 0.12169558641514251,
|
|
"calibration/mean_confidence": 0.3869068004266283,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 714.0,
|
|
"completions/max_terminated_length": 714.0,
|
|
"completions/mean_length": 204.9240234375,
|
|
"completions/mean_terminated_length": 205.08332214355468,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 93.0,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.0022575741168111563,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 369601495.0,
|
|
"reward": 1.1860121488571167,
|
|
"reward_std": 0.1875196486711502,
|
|
"rewards/accuracy_reward": 0.4072265625,
|
|
"rewards/batch_coverage_0": 0.482658326625824,
|
|
"rewards/batch_coverage_1": 0.482658326625824,
|
|
"rewards/batch_coverage_10": 0.5309643864631652,
|
|
"rewards/batch_coverage_15": 0.5396885633468628,
|
|
"rewards/batch_coverage_20": 0.5477299809455871,
|
|
"rewards/batch_coverage_25": 0.5511409044265747,
|
|
"rewards/batch_coverage_5": 0.5098637223243714,
|
|
"rewards/brier_reward": 0.8044361114501953,
|
|
"rewards/confidence_uniqueness_reward": 0.8674160838127136,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.0032843008171766996,
|
|
"rewards/frontier_ece_reward": 0.020676460489630698,
|
|
"rewards/frontier_entropy_batch_reward": -0.5084396362304687,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11280517578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14706921875476836,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056402587890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.056402587890625,
|
|
"signal/advantage_abs_mean": 0.14266299903392793,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14266299903392793,
|
|
"signal/advantage_pre_scale_std": 0.21263113021850585,
|
|
"signal/advantage_std": 0.21263113021850585,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1743027001619339,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23004747033119202,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017430270463228224,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.017430270463228224,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1743027001619339,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23004747033119202,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017430270463228224,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.017430270463228224,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.18721598982810975,
|
|
"signal/batch_coverage_10/group_std_mean": 0.24640632271766663,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.018721599504351615,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.018721599504351615,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.18796933591365814,
|
|
"signal/batch_coverage_15/group_std_mean": 0.24771904051303864,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01879693418741226,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01879693418741226,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1884627491235733,
|
|
"signal/batch_coverage_20/group_std_mean": 0.24858099222183228,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01884627602994442,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01884627602994442,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.192040291428566,
|
|
"signal/batch_coverage_25/group_std_mean": 0.25262742638587954,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01920402981340885,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01920402981340885,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18090624809265138,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2380412310361862,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01809062510728836,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01809062510728836,
|
|
"signal/brier_reward/centered_abs_mean": 0.15644164383411407,
|
|
"signal/brier_reward/group_std_mean": 0.20135876834392546,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015644165128469466,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015644165128469466,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08378114998340606,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10465183854103088,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008378114923834801,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008378114923834801,
|
|
"signal/format_reward/centered_abs_mean": 0.001690673828125,
|
|
"signal/format_reward/group_std_mean": 0.004635535972192883,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031608616933226585,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004724891297519207,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.951077233068645e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.951077233068645e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03993298932909965,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06318138912320137,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003993298951536417,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003993298951536417,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3889405906200409,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4657216012477875,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03889405876398087,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03889405876398087,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.42205895128309256,
|
|
"calibration/batch_distribution_entropy": 0.7692756274501231,
|
|
"calibration/buffer_distribution_entropy": 0.7559267013034778,
|
|
"calibration/confidence_entropy": 0.2641741483942744,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.051953125,
|
|
"calibration/coverage@15%": 0.092578125,
|
|
"calibration/coverage@20%": 0.245703125,
|
|
"calibration/coverage@25%": 0.291796875,
|
|
"calibration/coverage@30%": 0.32421875,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.16944534321662208,
|
|
"calibration/mean_confidence": 0.48004563898771097,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0009765625,
|
|
"completions/max_length": 604.4,
|
|
"completions/max_terminated_length": 604.4,
|
|
"completions/mean_length": 200.35048828125,
|
|
"completions/mean_terminated_length": 200.54432678222656,
|
|
"completions/min_length": 35.4,
|
|
"completions/min_terminated_length": 88.6,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.001792994444258511,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0006,
|
|
"num_tokens": 386718556.0,
|
|
"reward": 1.2112500190734863,
|
|
"reward_std": 0.187380114197731,
|
|
"rewards/accuracy_reward": 0.4390625,
|
|
"rewards/batch_coverage_0": 0.4950934827327728,
|
|
"rewards/batch_coverage_1": 0.4950934827327728,
|
|
"rewards/batch_coverage_10": 0.5496911406517029,
|
|
"rewards/batch_coverage_15": 0.5525175213813782,
|
|
"rewards/batch_coverage_20": 0.5536886215209961,
|
|
"rewards/batch_coverage_25": 0.5523674130439759,
|
|
"rewards/batch_coverage_5": 0.5226927876472474,
|
|
"rewards/brier_reward": 0.7964340925216675,
|
|
"rewards/confidence_uniqueness_reward": 0.8855133891105652,
|
|
"rewards/format_reward": 0.998828125,
|
|
"rewards/frontier_aurc_reward": -0.0035252573899924753,
|
|
"rewards/frontier_ece_reward": 0.020503921248018742,
|
|
"rewards/frontier_entropy_batch_reward": -0.5001079976558686,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1037353515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13716953694820405,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05186767578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05186767578125,
|
|
"signal/advantage_abs_mean": 0.14186038076877594,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14186038076877594,
|
|
"signal/advantage_pre_scale_std": 0.21083785593509674,
|
|
"signal/advantage_std": 0.21083785593509674,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.18369400203227998,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23681255280971528,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018369400501251222,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.018369400501251222,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.18369400203227998,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23681255280971528,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018369400501251222,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.018369400501251222,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.195779687166214,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2538564056158066,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.019577968493103982,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.019577968493103982,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.1936182737350464,
|
|
"signal/batch_coverage_15/group_std_mean": 0.25214785933494566,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01936182826757431,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01936182826757431,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.194045352935791,
|
|
"signal/batch_coverage_20/group_std_mean": 0.25312704145908355,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019404535368084907,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.019404535368084907,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.18496437072753907,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2431274473667145,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01849643774330616,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01849643774330616,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.19195379316806793,
|
|
"signal/batch_coverage_5/group_std_mean": 0.24739271104335786,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.019195379316806795,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.019195379316806795,
|
|
"signal/brier_reward/centered_abs_mean": 0.15315645933151245,
|
|
"signal/brier_reward/group_std_mean": 0.19987372756004335,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01531564611941576,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01531564611941576,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07148051410913467,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08934253603219985,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00714805144816637,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00714805144816637,
|
|
"signal/format_reward/centered_abs_mean": 0.0022216796875,
|
|
"signal/format_reward/group_std_mean": 0.005560987815260887,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00111083984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00111083984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036128945648670197,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005264179687947035,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.516118278843351e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.516118278843351e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03991732746362686,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0683354414999485,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00399173297919333,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00399173297919333,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3917108952999115,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4644228458404541,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03917108997702599,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03917108997702599,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3312790981825941,
|
|
"calibration/batch_distribution_entropy": 0.7078424659451629,
|
|
"calibration/buffer_distribution_entropy": 0.746863848902535,
|
|
"calibration/confidence_entropy": 0.2260585576901173,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.20234375,
|
|
"calibration/coverage@15%": 0.257421875,
|
|
"calibration/coverage@20%": 0.294921875,
|
|
"calibration/coverage@25%": 0.3265625,
|
|
"calibration/coverage@30%": 0.4231730063600782,
|
|
"calibration/coverage@5%": 0.094140625,
|
|
"calibration/ece": 0.13303364661015177,
|
|
"calibration/mean_confidence": 0.4431268148201619,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 626.8,
|
|
"completions/max_terminated_length": 626.8,
|
|
"completions/mean_length": 198.51220703125,
|
|
"completions/mean_terminated_length": 198.53143615722655,
|
|
"completions/min_length": 70.2,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0019350156653672457,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 403607833.0,
|
|
"reward": 1.2347772121429443,
|
|
"reward_std": 0.1755542814731598,
|
|
"rewards/accuracy_reward": 0.47451171875,
|
|
"rewards/batch_coverage_0": 0.486088764667511,
|
|
"rewards/batch_coverage_1": 0.486088764667511,
|
|
"rewards/batch_coverage_10": 0.5594327092170716,
|
|
"rewards/batch_coverage_15": 0.5647305846214294,
|
|
"rewards/batch_coverage_20": 0.5726596713066101,
|
|
"rewards/batch_coverage_25": 0.5790935397148133,
|
|
"rewards/batch_coverage_5": 0.5290896475315094,
|
|
"rewards/brier_reward": 0.8075304627418518,
|
|
"rewards/confidence_uniqueness_reward": 0.8788026452064515,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0032432018779218197,
|
|
"rewards/frontier_ece_reward": 0.02107427977025509,
|
|
"rewards/frontier_entropy_batch_reward": -0.5079957664012908,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.102032470703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14291528165340422,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0510162353515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0510162353515625,
|
|
"signal/advantage_abs_mean": 0.13100714832544327,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13100714832544327,
|
|
"signal/advantage_pre_scale_std": 0.20000562965869903,
|
|
"signal/advantage_std": 0.20000562965869903,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1647425413131714,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2197166472673416,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.016474254056811334,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.016474254056811334,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1647425413131714,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2197166472673416,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.016474254056811334,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.016474254056811334,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1836285799741745,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2453942656517029,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.018362860009074212,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.018362860009074212,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.18449074327945708,
|
|
"signal/batch_coverage_15/group_std_mean": 0.24565376341342926,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.018449075147509576,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.018449075147509576,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.18724102675914764,
|
|
"signal/batch_coverage_20/group_std_mean": 0.24988237917423248,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.018724103271961213,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.018724103271961213,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.18788727819919587,
|
|
"signal/batch_coverage_25/group_std_mean": 0.25117466747760775,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01878872811794281,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01878872811794281,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.17459736466407777,
|
|
"signal/batch_coverage_5/group_std_mean": 0.23254739940166474,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017459736764431,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.017459736764431,
|
|
"signal/brier_reward/centered_abs_mean": 0.1415181964635849,
|
|
"signal/brier_reward/group_std_mean": 0.18468352258205414,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014151819795370103,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014151819795370103,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07397404685616493,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09191499650478363,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007397404778748751,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007397404778748751,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002988464618101716,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004501758888363838,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.735580867214594e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.735580867214594e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03291532322764397,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.056967698782682416,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032915322575718165,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032915322575718165,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3701502561569214,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.45215981006622313,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0370150275528431,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0370150275528431,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.42148780620656484,
|
|
"calibration/batch_distribution_entropy": 0.7692129995349444,
|
|
"calibration/buffer_distribution_entropy": 0.7382113920548676,
|
|
"calibration/confidence_entropy": 0.26247756330596594,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.16482077205882353,
|
|
"calibration/coverage@30%": 0.20041819852941173,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.19796225418204746,
|
|
"calibration/mean_confidence": 0.48151096731969645,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 497.8,
|
|
"completions/max_terminated_length": 497.8,
|
|
"completions/mean_length": 196.94169921875,
|
|
"completions/mean_terminated_length": 197.02088928222656,
|
|
"completions/min_length": 77.2,
|
|
"completions/min_terminated_length": 95.8,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.0016334950923919678,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 420660964.0,
|
|
"reward": 1.2030918598175049,
|
|
"reward_std": 0.1932498872280121,
|
|
"rewards/accuracy_reward": 0.45927734375,
|
|
"rewards/batch_coverage_0": 0.4648968815803528,
|
|
"rewards/batch_coverage_1": 0.4648968815803528,
|
|
"rewards/batch_coverage_10": 0.5263925135135651,
|
|
"rewards/batch_coverage_15": 0.538670289516449,
|
|
"rewards/batch_coverage_20": 0.5505793452262878,
|
|
"rewards/batch_coverage_25": 0.5528130173683167,
|
|
"rewards/batch_coverage_5": 0.4986697494983673,
|
|
"rewards/brier_reward": 0.7922908544540406,
|
|
"rewards/confidence_uniqueness_reward": 0.8596806645393371,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.003926975373178721,
|
|
"rewards/frontier_ece_reward": 0.01958938278257847,
|
|
"rewards/frontier_entropy_batch_reward": -0.531503701210022,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.116705322265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15439043641090394,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0583526611328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0583526611328125,
|
|
"signal/advantage_abs_mean": 0.14729376435279845,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14729376435279845,
|
|
"signal/advantage_pre_scale_std": 0.21888698935508727,
|
|
"signal/advantage_std": 0.21888698935508727,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1779138207435608,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23396467566490173,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017791382595896722,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.017791382595896722,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1779138207435608,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23396467566490173,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017791382595896722,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.017791382595896722,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.19597465097904204,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2574351608753204,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.019597466662526132,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.019597466662526132,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.1948534607887268,
|
|
"signal/batch_coverage_15/group_std_mean": 0.25582134127616885,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.019485345855355263,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.019485345855355263,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.20348668098449707,
|
|
"signal/batch_coverage_20/group_std_mean": 0.26646647453308103,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.020348669216036796,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.020348669216036796,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.20679988861083984,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2701643168926239,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020679988712072373,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.020679988712072373,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18674066066741943,
|
|
"signal/batch_coverage_5/group_std_mean": 0.24514356553554534,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01867406629025936,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01867406629025936,
|
|
"signal/brier_reward/centered_abs_mean": 0.1615391790866852,
|
|
"signal/brier_reward/group_std_mean": 0.20824775099754333,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016153918392956258,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016153918392956258,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09066232591867447,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11036855280399323,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009066233038902282,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009066233038902282,
|
|
"signal/format_reward/centered_abs_mean": 0.00074462890625,
|
|
"signal/format_reward/group_std_mean": 0.0018734002485871315,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003549639508128166,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005085006635636091,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.43704950157553e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.43704950157553e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03371543250977993,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05844026431441307,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033715431578457355,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033715431578457355,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37131853103637696,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4504863560199738,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03713185340166092,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03713185340166092,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36384982656677556,
|
|
"calibration/batch_distribution_entropy": 0.7584043461756151,
|
|
"calibration/buffer_distribution_entropy": 0.7437387048920486,
|
|
"calibration/confidence_entropy": 0.2612899956313475,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.08835769324853229,
|
|
"calibration/coverage@25%": 0.22243303571428572,
|
|
"calibration/coverage@30%": 0.3017727189334638,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.20871118847848963,
|
|
"calibration/mean_confidence": 0.49207635194125976,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 539.4,
|
|
"completions/max_terminated_length": 539.4,
|
|
"completions/mean_length": 199.337890625,
|
|
"completions/mean_terminated_length": 199.41550903320314,
|
|
"completions/min_length": 38.6,
|
|
"completions/min_terminated_length": 92.8,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0018792530754581094,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0012,
|
|
"num_tokens": 437583368.0,
|
|
"reward": 1.1987483978271485,
|
|
"reward_std": 0.18848486244678497,
|
|
"rewards/accuracy_reward": 0.4486328125,
|
|
"rewards/batch_coverage_0": 0.4875182926654816,
|
|
"rewards/batch_coverage_1": 0.4875182926654816,
|
|
"rewards/batch_coverage_10": 0.5274044513702393,
|
|
"rewards/batch_coverage_15": 0.5361409902572631,
|
|
"rewards/batch_coverage_20": 0.5449886083602905,
|
|
"rewards/batch_coverage_25": 0.545144772529602,
|
|
"rewards/batch_coverage_5": 0.5098370552062989,
|
|
"rewards/brier_reward": 0.7905206561088562,
|
|
"rewards/confidence_uniqueness_reward": 0.8628069043159485,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0038138974457979204,
|
|
"rewards/frontier_ece_reward": 0.017152907513082027,
|
|
"rewards/frontier_entropy_batch_reward": -0.5617950916290283,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12532958984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1621655359864235,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062664794921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062664794921875,
|
|
"signal/advantage_abs_mean": 0.14475196599960327,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14475196599960327,
|
|
"signal/advantage_pre_scale_std": 0.2165150135755539,
|
|
"signal/advantage_std": 0.2165150135755539,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1752574473619461,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23077193200588225,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017525745183229448,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.017525745183229448,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1752574473619461,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23077193200588225,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017525745183229448,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.017525745183229448,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.18302838802337645,
|
|
"signal/batch_coverage_10/group_std_mean": 0.242452073097229,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01830283962190151,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01830283962190151,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.1806130141019821,
|
|
"signal/batch_coverage_15/group_std_mean": 0.24002839624881744,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.018061301857233047,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.018061301857233047,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.18715793490409852,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2486134171485901,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.018715794384479522,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.018715794384479522,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.18681722283363342,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2479231745004654,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01868172250688076,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01868172250688076,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.17968855500221254,
|
|
"signal/batch_coverage_5/group_std_mean": 0.23696467280387878,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017968856170773505,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.017968856170773505,
|
|
"signal/brier_reward/centered_abs_mean": 0.16554119884967805,
|
|
"signal/brier_reward/group_std_mean": 0.21077493131160735,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01655412055552006,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01655412055552006,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08097566366195678,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10046249628067017,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008097566477954388,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008097566477954388,
|
|
"signal/format_reward/centered_abs_mean": 0.000933837890625,
|
|
"signal/format_reward/group_std_mean": 0.0024258273653686045,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032779529225081205,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00480236979201436,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0974414150696245e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0974414150696245e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.029453156888484953,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.051109229773283006,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00294531574472785,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00294531574472785,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3639026403427124,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4436567842960358,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.034375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03639026433229446,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03639026433229446,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2638975584336835,
|
|
"calibration/batch_distribution_entropy": 0.7188330079276255,
|
|
"calibration/buffer_distribution_entropy": 0.7579654834999048,
|
|
"calibration/confidence_entropy": 0.23194149517651214,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.1719285102739726,
|
|
"calibration/coverage@15%": 0.2978420070939335,
|
|
"calibration/coverage@20%": 0.4002568493150685,
|
|
"calibration/coverage@25%": 0.44481256115459883,
|
|
"calibration/coverage@30%": 0.5624258500489236,
|
|
"calibration/coverage@5%": 0.101171875,
|
|
"calibration/ece": 0.15543515240158406,
|
|
"calibration/mean_confidence": 0.5313769595340647,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 578.8,
|
|
"completions/max_terminated_length": 578.8,
|
|
"completions/mean_length": 196.31123046875,
|
|
"completions/mean_terminated_length": 196.3304870605469,
|
|
"completions/min_length": 77.0,
|
|
"completions/min_terminated_length": 95.8,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0018460171995684505,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 454607931.0,
|
|
"reward": 1.2525221824645996,
|
|
"reward_std": 0.16612263917922973,
|
|
"rewards/accuracy_reward": 0.49345703125,
|
|
"rewards/batch_coverage_0": 0.5271356880664826,
|
|
"rewards/batch_coverage_1": 0.5271356880664826,
|
|
"rewards/batch_coverage_10": 0.572896933555603,
|
|
"rewards/batch_coverage_15": 0.5773912191390991,
|
|
"rewards/batch_coverage_20": 0.5841198205947876,
|
|
"rewards/batch_coverage_25": 0.5851166129112244,
|
|
"rewards/batch_coverage_5": 0.5565871596336365,
|
|
"rewards/brier_reward": 0.8180199027061462,
|
|
"rewards/confidence_uniqueness_reward": 0.8616907119750976,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0027001814683899283,
|
|
"rewards/frontier_ece_reward": 0.020215665549039842,
|
|
"rewards/frontier_entropy_batch_reward": -0.5710589647293091,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.111077880859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.14017398059368133,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0555389404296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0555389404296875,
|
|
"signal/advantage_abs_mean": 0.12592501640319825,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12592501640319825,
|
|
"signal/advantage_pre_scale_std": 0.2006388008594513,
|
|
"signal/advantage_std": 0.2006388008594513,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1553127273917198,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20583226680755615,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.015531273372471333,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.015531273372471333,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1553127273917198,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20583226680755615,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.015531273372471333,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.015531273372471333,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.16552983224391937,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2202944576740265,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016552984528243542,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.016552984528243542,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.16490549743175506,
|
|
"signal/batch_coverage_15/group_std_mean": 0.21991809606552123,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016490550339221956,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.016490550339221956,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.16641467809677124,
|
|
"signal/batch_coverage_20/group_std_mean": 0.22252885103225709,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01664146836847067,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01664146836847067,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.16553274691104888,
|
|
"signal/batch_coverage_25/group_std_mean": 0.22252612709999084,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016553275845944883,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.016553275845944883,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.15990071445703508,
|
|
"signal/batch_coverage_5/group_std_mean": 0.21237186789512635,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015990072302520274,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.015990072302520274,
|
|
"signal/brier_reward/centered_abs_mean": 0.14135312736034394,
|
|
"signal/brier_reward/group_std_mean": 0.1822331041097641,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014135313406586646,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014135313406586646,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07747237235307694,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09563995599746704,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007747237477451563,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007747237477451563,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024171784985810517,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035217163152992726,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0214731305022723e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0214731305022723e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.027930035442113876,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.050855685770511624,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002793003572151065,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002793003572151065,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.355313766002655,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43729751706123354,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0355313777923584,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0355313777923584,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2780457111426582,
|
|
"calibration/batch_distribution_entropy": 0.7576471482008242,
|
|
"calibration/buffer_distribution_entropy": 0.7712803345094807,
|
|
"calibration/confidence_entropy": 0.2737813753667616,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.03203125,
|
|
"calibration/coverage@15%": 0.1234375,
|
|
"calibration/coverage@20%": 0.24453125,
|
|
"calibration/coverage@25%": 0.47265625,
|
|
"calibration/coverage@30%": 0.568359375,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.18375648447289655,
|
|
"calibration/mean_confidence": 0.5859185726642646,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 542.2,
|
|
"completions/max_terminated_length": 542.2,
|
|
"completions/mean_length": 199.802734375,
|
|
"completions/mean_terminated_length": 199.82284851074218,
|
|
"completions/min_length": 76.0,
|
|
"completions/min_terminated_length": 76.4,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.002454441273584962,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 471606711.0,
|
|
"reward": 1.2395191192626953,
|
|
"reward_std": 0.18239690661430358,
|
|
"rewards/accuracy_reward": 0.47587890625,
|
|
"rewards/batch_coverage_0": 0.5014855623245239,
|
|
"rewards/batch_coverage_1": 0.5014855623245239,
|
|
"rewards/batch_coverage_10": 0.5618399024009705,
|
|
"rewards/batch_coverage_15": 0.5709842443466187,
|
|
"rewards/batch_coverage_20": 0.5739490628242493,
|
|
"rewards/batch_coverage_25": 0.575732946395874,
|
|
"rewards/batch_coverage_5": 0.5394026637077332,
|
|
"rewards/brier_reward": 0.8151318907737732,
|
|
"rewards/confidence_uniqueness_reward": 0.899455189704895,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.002985543245449662,
|
|
"rewards/frontier_ece_reward": 0.0208277877420187,
|
|
"rewards/frontier_entropy_batch_reward": -0.542172086238861,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.103057861328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1395617365837097,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515289306640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0515289306640625,
|
|
"signal/advantage_abs_mean": 0.13970653414726258,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13970653414726258,
|
|
"signal/advantage_pre_scale_std": 0.20998115837574005,
|
|
"signal/advantage_std": 0.20998115837574005,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.17447925209999085,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2283736675977707,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017447925359010696,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.017447925359010696,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.17447925209999085,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2283736675977707,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017447925359010696,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.017447925359010696,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.19056746661663054,
|
|
"signal/batch_coverage_10/group_std_mean": 0.24970765709877013,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01905674673616886,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01905674673616886,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.19551347494125365,
|
|
"signal/batch_coverage_15/group_std_mean": 0.25578183233737944,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01955134831368923,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01955134831368923,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.19734205305576324,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2576554536819458,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019734205678105354,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.019734205678105354,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.19722734689712523,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2575598418712616,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01972273513674736,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01972273513674736,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18471661508083342,
|
|
"signal/batch_coverage_5/group_std_mean": 0.24224046170711516,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01847166083753109,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01847166083753109,
|
|
"signal/brier_reward/centered_abs_mean": 0.15095800459384917,
|
|
"signal/brier_reward/group_std_mean": 0.19546601474285125,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015095800533890725,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015095800533890725,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.052432583272457124,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06639125794172288,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005243258271366358,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005243258271366358,
|
|
"signal/format_reward/centered_abs_mean": 0.00072021484375,
|
|
"signal/format_reward/group_std_mean": 0.001477878913283348,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000360107421875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000360107421875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022633123211562634,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034012056421488523,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8291404669289476e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8291404669289476e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.032904643565416336,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.059289424866437915,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00329046449624002,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00329046449624002,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36733065247535707,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44678170084953306,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03673306554555893,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03673306554555893,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4133911892897408,
|
|
"calibration/batch_distribution_entropy": 0.7790395321052032,
|
|
"calibration/buffer_distribution_entropy": 0.7791011137289166,
|
|
"calibration/confidence_entropy": 0.2779780658776635,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.096875,
|
|
"calibration/coverage@25%": 0.1640625,
|
|
"calibration/coverage@30%": 0.331640625,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.1923393379600086,
|
|
"calibration/mean_confidence": 0.4937273238161442,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 718.2,
|
|
"completions/max_terminated_length": 718.2,
|
|
"completions/mean_length": 204.9798828125,
|
|
"completions/mean_terminated_length": 205.0396270751953,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 86.4,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0015804837457835674,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0012,
|
|
"num_tokens": 488876521.0,
|
|
"reward": 1.164589262008667,
|
|
"reward_std": 0.1838443696498871,
|
|
"rewards/accuracy_reward": 0.408984375,
|
|
"rewards/batch_coverage_0": 0.4630319595336914,
|
|
"rewards/batch_coverage_1": 0.4630319595336914,
|
|
"rewards/batch_coverage_10": 0.5065688967704773,
|
|
"rewards/batch_coverage_15": 0.5105774939060211,
|
|
"rewards/batch_coverage_20": 0.5268525779247284,
|
|
"rewards/batch_coverage_25": 0.5302481591701508,
|
|
"rewards/batch_coverage_5": 0.48869837522506715,
|
|
"rewards/brier_reward": 0.7661522626876831,
|
|
"rewards/confidence_uniqueness_reward": 0.8857262253761291,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.004124029399827122,
|
|
"rewards/frontier_ece_reward": 0.010478467494249345,
|
|
"rewards/frontier_entropy_batch_reward": -0.5479272603988647,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1063720703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14106076657772065,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05318603515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05318603515625,
|
|
"signal/advantage_abs_mean": 0.1393085926771164,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1393085926771164,
|
|
"signal/advantage_pre_scale_std": 0.2106298953294754,
|
|
"signal/advantage_std": 0.2106298953294754,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.16174939572811126,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2152931123971939,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.016174939833581446,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.016174939833581446,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.16174939572811126,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2152931123971939,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.016174939833581446,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.016174939833581446,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1717519074678421,
|
|
"signal/batch_coverage_10/group_std_mean": 0.22983591854572297,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01717519052326679,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01717519052326679,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.17370556890964509,
|
|
"signal/batch_coverage_15/group_std_mean": 0.23222627639770507,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01737055741250515,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01737055741250515,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.18301475644111634,
|
|
"signal/batch_coverage_20/group_std_mean": 0.24411162734031677,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01830147597938776,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01830147597938776,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.18677313327789308,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2487118124961853,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01867731362581253,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01867731362581253,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.16763336062431336,
|
|
"signal/batch_coverage_5/group_std_mean": 0.22398186922073365,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016763335466384886,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.016763335466384886,
|
|
"signal/brier_reward/centered_abs_mean": 0.15969540774822236,
|
|
"signal/brier_reward/group_std_mean": 0.20515718460083007,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015969540737569333,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015969540737569333,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.058282620459795,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07511355727910995,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0058282620273530485,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0058282620273530485,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00264003137126565,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003911961335688829,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.300039425084833e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.300039425084833e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.035570315271615985,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.062593774497509,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003557031648233533,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003557031648233533,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3604434788227081,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4358489096164703,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.040625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03604434877634048,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03604434877634048,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32654655608043387,
|
|
"calibration/batch_distribution_entropy": 0.7668590127145694,
|
|
"calibration/buffer_distribution_entropy": 0.7802321533968591,
|
|
"calibration/confidence_entropy": 0.2612017339736417,
|
|
"calibration/coverage@0%": 0.033203125,
|
|
"calibration/coverage@1%": 0.033203125,
|
|
"calibration/coverage@10%": 0.073046875,
|
|
"calibration/coverage@15%": 0.084765625,
|
|
"calibration/coverage@20%": 0.266796875,
|
|
"calibration/coverage@25%": 0.36484375,
|
|
"calibration/coverage@30%": 0.3953125,
|
|
"calibration/coverage@5%": 0.0609375,
|
|
"calibration/ece": 0.19328091315566903,
|
|
"calibration/mean_confidence": 0.4979834022668042,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 675.4,
|
|
"completions/max_terminated_length": 675.4,
|
|
"completions/mean_length": 208.08876953125,
|
|
"completions/mean_terminated_length": 208.19093627929686,
|
|
"completions/min_length": 38.8,
|
|
"completions/min_terminated_length": 96.2,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.00185799365863204,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 506055382.0,
|
|
"reward": 1.2049114227294921,
|
|
"reward_std": 0.17455832958221434,
|
|
"rewards/accuracy_reward": 0.46943359375,
|
|
"rewards/batch_coverage_0": 0.4818290829658508,
|
|
"rewards/batch_coverage_1": 0.4818290829658508,
|
|
"rewards/batch_coverage_10": 0.5297632455825806,
|
|
"rewards/batch_coverage_15": 0.5313080787658692,
|
|
"rewards/batch_coverage_20": 0.5360439538955688,
|
|
"rewards/batch_coverage_25": 0.5369215071201324,
|
|
"rewards/batch_coverage_5": 0.5070482075214386,
|
|
"rewards/brier_reward": 0.7813033342361451,
|
|
"rewards/confidence_uniqueness_reward": 0.8709333419799805,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0030639852862805127,
|
|
"rewards/frontier_ece_reward": 0.015050551109015941,
|
|
"rewards/frontier_entropy_batch_reward": -0.5662830948829651,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.110113525390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15058882236480714,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0550567626953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0550567626953125,
|
|
"signal/advantage_abs_mean": 0.13103221952915192,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13103221952915192,
|
|
"signal/advantage_pre_scale_std": 0.2030962586402893,
|
|
"signal/advantage_std": 0.2030962586402893,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15617549121379853,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20788954198360443,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01561754960566759,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01561754960566759,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15617549121379853,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20788954198360443,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01561754960566759,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01561754960566759,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.16319622099399567,
|
|
"signal/batch_coverage_10/group_std_mean": 0.21909156441688538,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01631962265819311,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01631962265819311,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.16425377130508423,
|
|
"signal/batch_coverage_15/group_std_mean": 0.22029703259468078,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016425377689301968,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.016425377689301968,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.16655297875404357,
|
|
"signal/batch_coverage_20/group_std_mean": 0.22349075376987457,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016655297577381135,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.016655297577381135,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.16626424193382264,
|
|
"signal/batch_coverage_25/group_std_mean": 0.22361468076705932,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016626424714922906,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.016626424714922906,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.16179881393909454,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2154046893119812,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016179881058633327,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.016179881058633327,
|
|
"signal/brier_reward/centered_abs_mean": 0.15501052141189575,
|
|
"signal/brier_reward/group_std_mean": 0.19950452148914338,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015501052141189575,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015501052141189575,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06272363662719727,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08009307086467743,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006272363662719727,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006272363662719727,
|
|
"signal/format_reward/centered_abs_mean": 0.001300048828125,
|
|
"signal/format_reward/group_std_mean": 0.0031943732406944036,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021038135048002003,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002958751143887639,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6297669319319538e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6297669319319538e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030137370526790618,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0553176075220108,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003013736940920353,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003013736940920353,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3504864811897278,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42903093695640565,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.034375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03504864946007728,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03504864946007728,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.4884404305130252,
|
|
"eval_calibration/batch_distribution_entropy": 0.7037518889147907,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7807073277954115,
|
|
"eval_calibration/confidence_entropy": 0.24880659410045255,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.078125,
|
|
"eval_calibration/coverage@15%": 0.078125,
|
|
"eval_calibration/coverage@20%": 0.09375,
|
|
"eval_calibration/coverage@25%": 0.125,
|
|
"eval_calibration/coverage@30%": 0.1328125,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.2413991424156957,
|
|
"eval_calibration/mean_confidence": 0.46008607263806767,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 441.0,
|
|
"eval_completions/max_terminated_length": 441.0,
|
|
"eval_completions/mean_length": 210.2900733947754,
|
|
"eval_completions/mean_terminated_length": 210.2900733947754,
|
|
"eval_completions/min_length": 109.0,
|
|
"eval_completions/min_terminated_length": 109.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 506055382.0,
|
|
"eval_reward": 0.9580488353967667,
|
|
"eval_reward_std": 0.2786233425140381,
|
|
"eval_rewards/accuracy_reward": 0.3828125,
|
|
"eval_rewards/batch_coverage_0": 0.2982771582901478,
|
|
"eval_rewards/batch_coverage_1": 0.2982771582901478,
|
|
"eval_rewards/batch_coverage_10": 0.2973833717405796,
|
|
"eval_rewards/batch_coverage_15": 0.2931292913854122,
|
|
"eval_rewards/batch_coverage_20": 0.28491435572505,
|
|
"eval_rewards/batch_coverage_25": 0.26486651226878166,
|
|
"eval_rewards/batch_coverage_5": 0.2982771582901478,
|
|
"eval_rewards/brier_reward": 0.794169008731842,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8291015625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0035186284221708775,
|
|
"eval_rewards/frontier_ece_reward": 0.008470115077216178,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 22.6013,
|
|
"eval_samples_per_second": 22.123,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4619140625,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.487694188952446,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23095703125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23095703125,
|
|
"eval_signal/advantage_abs_mean": 0.23249849677085876,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.23249849677085876,
|
|
"eval_signal/advantage_pre_scale_std": 0.27607636898756027,
|
|
"eval_signal/advantage_std": 0.27607636898756027,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.4355214685201645,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.5041546821594238,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04355214722454548,
|
|
"eval_signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04355214722454548,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.4355214685201645,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.5041546821594238,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04355214722454548,
|
|
"eval_signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04355214722454548,
|
|
"eval_signal/batch_coverage_10/centered_abs_mean": 0.43344543874263763,
|
|
"eval_signal/batch_coverage_10/group_std_mean": 0.5016847252845764,
|
|
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04334454517811537,
|
|
"eval_signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.04334454517811537,
|
|
"eval_signal/batch_coverage_15/centered_abs_mean": 0.42580925673246384,
|
|
"eval_signal/batch_coverage_15/group_std_mean": 0.4928712695837021,
|
|
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0425809258595109,
|
|
"eval_signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.0425809258595109,
|
|
"eval_signal/batch_coverage_20/centered_abs_mean": 0.4136809632182121,
|
|
"eval_signal/batch_coverage_20/group_std_mean": 0.4787442535161972,
|
|
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04136809799820185,
|
|
"eval_signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.04136809799820185,
|
|
"eval_signal/batch_coverage_25/centered_abs_mean": 0.3728942573070526,
|
|
"eval_signal/batch_coverage_25/group_std_mean": 0.43325306475162506,
|
|
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03728942573070526,
|
|
"eval_signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03728942573070526,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.4355214685201645,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.5041546821594238,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04355214722454548,
|
|
"eval_signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04355214722454548,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.27639148384332657,
|
|
"eval_signal/brier_reward/group_std_mean": 0.34276602417230606,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027639148756861687,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027639148756861687,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0998077392578125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12010791897773743,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009980774368159473,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009980774368159473,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00420642871176824,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007171055534854531,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.25803607160924e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.25803607160924e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03641515225172043,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.05970622505992651,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036415152717381716,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036415152717381716,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.177,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.38867555529956505,
|
|
"calibration/batch_distribution_entropy": 0.744077371845278,
|
|
"calibration/buffer_distribution_entropy": 0.7789951146287379,
|
|
"calibration/confidence_entropy": 0.2551266385058731,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.16171875,
|
|
"calibration/coverage@15%": 0.177734375,
|
|
"calibration/coverage@20%": 0.2109375,
|
|
"calibration/coverage@25%": 0.240625,
|
|
"calibration/coverage@30%": 0.301171875,
|
|
"calibration/coverage@5%": 0.1078125,
|
|
"calibration/ece": 0.18511672998881415,
|
|
"calibration/mean_confidence": 0.5120126299693563,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 588.2,
|
|
"completions/max_terminated_length": 588.2,
|
|
"completions/mean_length": 208.57919921875,
|
|
"completions/mean_terminated_length": 208.62011108398437,
|
|
"completions/min_length": 75.6,
|
|
"completions/min_terminated_length": 96.0,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0018345050048083067,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 523499073.0,
|
|
"reward": 1.2330967903137207,
|
|
"reward_std": 0.1739658534526825,
|
|
"rewards/accuracy_reward": 0.5115234375,
|
|
"rewards/batch_coverage_0": 0.4806299567222595,
|
|
"rewards/batch_coverage_1": 0.4806299567222595,
|
|
"rewards/batch_coverage_10": 0.5262403607368469,
|
|
"rewards/batch_coverage_15": 0.5340194821357727,
|
|
"rewards/batch_coverage_20": 0.548810887336731,
|
|
"rewards/batch_coverage_25": 0.5515802383422852,
|
|
"rewards/batch_coverage_5": 0.5057286143302917,
|
|
"rewards/brier_reward": 0.7810137271881104,
|
|
"rewards/confidence_uniqueness_reward": 0.8951348662376404,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0029973339987918735,
|
|
"rewards/frontier_ece_reward": 0.01581182572990656,
|
|
"rewards/frontier_entropy_batch_reward": -0.5444094181060791,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10760498046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13941818177700044,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053802490234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.053802490234375,
|
|
"signal/advantage_abs_mean": 0.13376112133264542,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13376112133264542,
|
|
"signal/advantage_pre_scale_std": 0.20336721539497377,
|
|
"signal/advantage_std": 0.20336721539497377,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.16944837868213652,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2214631974697113,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.016944839060306548,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.016944839060306548,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.16944837868213652,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2214631974697113,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.016944839060306548,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.016944839060306548,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.17917440831661224,
|
|
"signal/batch_coverage_10/group_std_mean": 0.23413580656051636,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.017917441949248315,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.017917441949248315,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.17658520340919495,
|
|
"signal/batch_coverage_15/group_std_mean": 0.23149906396865844,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.017658520489931107,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.017658520489931107,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1827937513589859,
|
|
"signal/batch_coverage_20/group_std_mean": 0.24057833552360536,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01827937588095665,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01827937588095665,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.1835138112306595,
|
|
"signal/batch_coverage_25/group_std_mean": 0.24196174442768098,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01835138164460659,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01835138164460659,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1757124364376068,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2290509968996048,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017571244016289712,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.017571244016289712,
|
|
"signal/brier_reward/centered_abs_mean": 0.1527266710996628,
|
|
"signal/brier_reward/group_std_mean": 0.1937905639410019,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015272667445242406,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015272667445242406,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.053154267370700836,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06698481962084771,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005315426737070084,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005315426737070084,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814434766769,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002370417304337025,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003476083744317293,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.963021761388518e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.963021761388518e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.025968721136450768,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04501113593578339,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025968722999095916,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025968722999095916,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3555388033390045,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43377270102500914,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035553880780935285,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035553880780935285,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3188977987264959,
|
|
"calibration/batch_distribution_entropy": 0.7773323065899319,
|
|
"calibration/buffer_distribution_entropy": 0.7764884772212833,
|
|
"calibration/confidence_entropy": 0.2696159268208872,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.186328125,
|
|
"calibration/coverage@15%": 0.290234375,
|
|
"calibration/coverage@20%": 0.344140625,
|
|
"calibration/coverage@25%": 0.402734375,
|
|
"calibration/coverage@30%": 0.44765625,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.17410244165023042,
|
|
"calibration/mean_confidence": 0.5204375945705545,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 648.2,
|
|
"completions/max_terminated_length": 648.2,
|
|
"completions/mean_length": 203.21279296875,
|
|
"completions/mean_terminated_length": 203.2727264404297,
|
|
"completions/min_length": 36.8,
|
|
"completions/min_terminated_length": 95.4,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.001649328856728971,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 540725636.0,
|
|
"reward": 1.2527109146118165,
|
|
"reward_std": 0.175427907705307,
|
|
"rewards/accuracy_reward": 0.51953125,
|
|
"rewards/batch_coverage_0": 0.49197604656219485,
|
|
"rewards/batch_coverage_1": 0.49197604656219485,
|
|
"rewards/batch_coverage_10": 0.5528481423854827,
|
|
"rewards/batch_coverage_15": 0.5631466746330261,
|
|
"rewards/batch_coverage_20": 0.5689069867134094,
|
|
"rewards/batch_coverage_25": 0.5744830250740052,
|
|
"rewards/batch_coverage_5": 0.5323562324047089,
|
|
"rewards/brier_reward": 0.8082866907119751,
|
|
"rewards/confidence_uniqueness_reward": 0.8861725449562072,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.002741323783993721,
|
|
"rewards/frontier_ece_reward": 0.01900950875133276,
|
|
"rewards/frontier_entropy_batch_reward": -0.556925094127655,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11180419921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14193963408470153,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.055902099609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.055902099609375,
|
|
"signal/advantage_abs_mean": 0.13240396827459336,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13240396827459336,
|
|
"signal/advantage_pre_scale_std": 0.20585049986839293,
|
|
"signal/advantage_std": 0.20585049986839293,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15534441769123078,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20446575582027435,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01553444191813469,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01553444191813469,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15534441769123078,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20446575582027435,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01553444191813469,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01553444191813469,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.16945621967315674,
|
|
"signal/batch_coverage_10/group_std_mean": 0.22647275328636168,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016945621743798257,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.016945621743798257,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.1726034849882126,
|
|
"signal/batch_coverage_15/group_std_mean": 0.23102477192878723,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.017260348610579967,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.017260348610579967,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.17522413432598113,
|
|
"signal/batch_coverage_20/group_std_mean": 0.234201380610466,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017522412911057474,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.017522412911057474,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.1778273493051529,
|
|
"signal/batch_coverage_25/group_std_mean": 0.23738239705562592,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0177827350795269,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.0177827350795269,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1642172545194626,
|
|
"signal/batch_coverage_5/group_std_mean": 0.21815676391124725,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016421726159751415,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.016421726159751415,
|
|
"signal/brier_reward/centered_abs_mean": 0.14843180775642395,
|
|
"signal/brier_reward/group_std_mean": 0.19130195081233978,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01484318058937788,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01484318058937788,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06038021594285965,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07598417848348618,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006038021761924028,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006038021761924028,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025649062590673566,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038140499033033847,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.206132823834196e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.206132823834196e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023592386022210123,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03882751725614071,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023592386161908506,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023592386161908506,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36873559951782225,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44527164101600647,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036873559653759005,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036873559653759005,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22378652873622512,
|
|
"calibration/batch_distribution_entropy": 0.7426672349348944,
|
|
"calibration/buffer_distribution_entropy": 0.7743966444436385,
|
|
"calibration/confidence_entropy": 0.23855738268617682,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.30232383578431377,
|
|
"calibration/coverage@15%": 0.38014705882352945,
|
|
"calibration/coverage@20%": 0.466141237745098,
|
|
"calibration/coverage@25%": 0.611890318627451,
|
|
"calibration/coverage@30%": 0.698265931372549,
|
|
"calibration/coverage@5%": 0.20137101715686273,
|
|
"calibration/ece": 0.1219204127819263,
|
|
"calibration/mean_confidence": 0.550545181805313,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 593.4,
|
|
"completions/max_terminated_length": 593.4,
|
|
"completions/mean_length": 197.36845703125,
|
|
"completions/mean_terminated_length": 197.5027648925781,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 92.4,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0024422414135187864,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 557776225.0,
|
|
"reward": 1.2554330110549927,
|
|
"reward_std": 0.16490131318569184,
|
|
"rewards/accuracy_reward": 0.5,
|
|
"rewards/batch_coverage_0": 0.5215178728103638,
|
|
"rewards/batch_coverage_1": 0.5215178728103638,
|
|
"rewards/batch_coverage_10": 0.5667154312133789,
|
|
"rewards/batch_coverage_15": 0.5763909935951232,
|
|
"rewards/batch_coverage_20": 0.5892943024635315,
|
|
"rewards/batch_coverage_25": 0.5948551058769226,
|
|
"rewards/batch_coverage_5": 0.544804036617279,
|
|
"rewards/brier_reward": 0.8188990354537964,
|
|
"rewards/confidence_uniqueness_reward": 0.8539436340332032,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.002834248635917902,
|
|
"rewards/frontier_ece_reward": 0.016052440367639066,
|
|
"rewards/frontier_entropy_batch_reward": -0.5458881616592407,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11424560546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14434780478477477,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.057122802734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.057122802734375,
|
|
"signal/advantage_abs_mean": 0.12325243949890137,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12325243949890137,
|
|
"signal/advantage_pre_scale_std": 0.19943318963050843,
|
|
"signal/advantage_std": 0.19943318963050843,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15505726933479308,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20619003772735595,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.015505727007985115,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.015505727007985115,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15505726933479308,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20619003772735595,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.015505727007985115,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.015505727007985115,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1644124746322632,
|
|
"signal/batch_coverage_10/group_std_mean": 0.21964193284511566,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016441247425973415,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.016441247425973415,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.1649382770061493,
|
|
"signal/batch_coverage_15/group_std_mean": 0.22163851857185363,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016493828408420085,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.016493828408420085,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1724429965019226,
|
|
"signal/batch_coverage_20/group_std_mean": 0.23197215795516968,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017244300059974193,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.017244300059974193,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.17294214367866517,
|
|
"signal/batch_coverage_25/group_std_mean": 0.23340463042259216,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01729421429336071,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01729421429336071,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1575198918581009,
|
|
"signal/batch_coverage_5/group_std_mean": 0.20969080328941345,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01575198918581009,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01575198918581009,
|
|
"signal/brier_reward/centered_abs_mean": 0.13578990399837493,
|
|
"signal/brier_reward/group_std_mean": 0.1776987671852112,
|
|
"signal/brier_reward/group_zero_std_frac": 0.009375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01357899084687233,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01357899084687233,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07442506104707718,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09471029192209243,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007442506123334169,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007442506123334169,
|
|
"signal/format_reward/centered_abs_mean": 0.001251220703125,
|
|
"signal/format_reward/group_std_mean": 0.0027073150966316463,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006256103515625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006256103515625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002246782067231834,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032120409421622755,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8084776931791567e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8084776931791567e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016834354028105736,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02396107092499733,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016834354726597666,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016834354726597666,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34070093631744386,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4177111804485321,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.053125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034070093929767606,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034070093929767606,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23778958238290318,
|
|
"calibration/batch_distribution_entropy": 0.6721844309871134,
|
|
"calibration/buffer_distribution_entropy": 0.7698408958916128,
|
|
"calibration/confidence_entropy": 0.20618978053254292,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.18515625,
|
|
"calibration/coverage@15%": 0.325390625,
|
|
"calibration/coverage@20%": 0.545703125,
|
|
"calibration/coverage@25%": 0.658984375,
|
|
"calibration/coverage@30%": 0.73359375,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.12480381607492423,
|
|
"calibration/mean_confidence": 0.5956984779267076,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 609.0,
|
|
"completions/max_terminated_length": 609.0,
|
|
"completions/mean_length": 192.44296875,
|
|
"completions/mean_terminated_length": 192.479736328125,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 92.8,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.002244866918772459,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 574910425.0,
|
|
"reward": 1.2379089832305907,
|
|
"reward_std": 0.17852184176445007,
|
|
"rewards/accuracy_reward": 0.5314453125,
|
|
"rewards/batch_coverage_0": 0.45688796043395996,
|
|
"rewards/batch_coverage_1": 0.45688796043395996,
|
|
"rewards/batch_coverage_10": 0.5334352254867554,
|
|
"rewards/batch_coverage_15": 0.5431413769721984,
|
|
"rewards/batch_coverage_20": 0.5495520114898682,
|
|
"rewards/batch_coverage_25": 0.5513846039772033,
|
|
"rewards/batch_coverage_5": 0.49837467074394226,
|
|
"rewards/brier_reward": 0.7937116980552673,
|
|
"rewards/confidence_uniqueness_reward": 0.8674801826477051,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0031372241675853728,
|
|
"rewards/frontier_ece_reward": 0.015439392626285553,
|
|
"rewards/frontier_entropy_batch_reward": -0.5430628538131714,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11666259765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15614081025123597,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058331298828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.058331298828125,
|
|
"signal/advantage_abs_mean": 0.1334821343421936,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1334821343421936,
|
|
"signal/advantage_pre_scale_std": 0.2087447464466095,
|
|
"signal/advantage_std": 0.2087447464466095,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15778434574604033,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20611576437950135,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01577843427658081,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01577843427658081,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15778434574604033,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20611576437950135,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01577843427658081,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01577843427658081,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1730392426252365,
|
|
"signal/batch_coverage_10/group_std_mean": 0.2319386124610901,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.017303923889994622,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.017303923889994622,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.17587968111038207,
|
|
"signal/batch_coverage_15/group_std_mean": 0.236191463470459,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01758796814829111,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01758796814829111,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1733997642993927,
|
|
"signal/batch_coverage_20/group_std_mean": 0.23472839891910552,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017339977063238622,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.017339977063238622,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.1751231223344803,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2371717870235443,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01751231253147125,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01751231253147125,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.167154985666275,
|
|
"signal/batch_coverage_5/group_std_mean": 0.22095239162445068,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016715498454868794,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.016715498454868794,
|
|
"signal/brier_reward/centered_abs_mean": 0.14963855743408203,
|
|
"signal/brier_reward/group_std_mean": 0.1929429590702057,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014963855780661107,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014963855780661107,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07363773882389069,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09168784022331238,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007363773882389069,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007363773882389069,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027165337465703487,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003975105192512274,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.395667154109106e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.395667154109106e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01730995737016201,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02390244007110596,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017309957882389426,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017309957882389426,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34708802700042723,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42616881132125856,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03470880389213562,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03470880389213562,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2520918647908457,
|
|
"calibration/batch_distribution_entropy": 0.7586509591853903,
|
|
"calibration/buffer_distribution_entropy": 0.7642095306255758,
|
|
"calibration/confidence_entropy": 0.2576317244833691,
|
|
"calibration/coverage@0%": 0.098046875,
|
|
"calibration/coverage@1%": 0.131640625,
|
|
"calibration/coverage@10%": 0.279296875,
|
|
"calibration/coverage@15%": 0.339453125,
|
|
"calibration/coverage@20%": 0.428125,
|
|
"calibration/coverage@25%": 0.510546875,
|
|
"calibration/coverage@30%": 0.665234375,
|
|
"calibration/coverage@5%": 0.171875,
|
|
"calibration/ece": 0.11032255719491908,
|
|
"calibration/mean_confidence": 0.5397637404746065,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 667.0,
|
|
"completions/max_terminated_length": 667.0,
|
|
"completions/mean_length": 191.084765625,
|
|
"completions/mean_terminated_length": 191.14002685546876,
|
|
"completions/min_length": 37.4,
|
|
"completions/min_terminated_length": 89.4,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.001731444033794105,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 591688541.0,
|
|
"reward": 1.2657613039016724,
|
|
"reward_std": 0.15425851047039033,
|
|
"rewards/accuracy_reward": 0.49580078125,
|
|
"rewards/batch_coverage_0": 0.5337414622306824,
|
|
"rewards/batch_coverage_1": 0.5337414622306824,
|
|
"rewards/batch_coverage_10": 0.5779079556465149,
|
|
"rewards/batch_coverage_15": 0.5824267387390136,
|
|
"rewards/batch_coverage_20": 0.5927862644195556,
|
|
"rewards/batch_coverage_25": 0.5977591753005982,
|
|
"rewards/batch_coverage_5": 0.5679870009422302,
|
|
"rewards/brier_reward": 0.8221750140190125,
|
|
"rewards/confidence_uniqueness_reward": 0.8881647944450378,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.002653585933148861,
|
|
"rewards/frontier_ece_reward": 0.0161499110981822,
|
|
"rewards/frontier_entropy_batch_reward": -0.5319459795951843,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085516357421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11478217989206314,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0427581787109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0427581787109375,
|
|
"signal/advantage_abs_mean": 0.11694410741329193,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11694410741329193,
|
|
"signal/advantage_pre_scale_std": 0.18468815684318543,
|
|
"signal/advantage_std": 0.18468815684318543,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.14939452409744264,
|
|
"signal/batch_coverage_0/group_std_mean": 0.19393481314182281,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.014939452335238457,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.014939452335238457,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.14939452409744264,
|
|
"signal/batch_coverage_1/group_std_mean": 0.19393481314182281,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.014939452335238457,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.014939452335238457,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1572708457708359,
|
|
"signal/batch_coverage_10/group_std_mean": 0.20482723116874696,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.015727085433900355,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.015727085433900355,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.15779331028461457,
|
|
"signal/batch_coverage_15/group_std_mean": 0.2049179792404175,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015779331885278226,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.015779331885278226,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.160746768116951,
|
|
"signal/batch_coverage_20/group_std_mean": 0.20988258421421052,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016074676625430585,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.016074676625430585,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.16449660658836365,
|
|
"signal/batch_coverage_25/group_std_mean": 0.2150314450263977,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016449661180377006,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.016449661180377006,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.15509903728961943,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2023308277130127,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015509903617203235,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.015509903617203235,
|
|
"signal/brier_reward/centered_abs_mean": 0.12762503176927567,
|
|
"signal/brier_reward/group_std_mean": 0.16494102776050568,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012762503698468208,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012762503698468208,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05460420995950699,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0708104282617569,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005460420995950699,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005460420995950699,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001886643934994936,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002751070214435458,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.358304955123458e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.358304955123458e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016662517562508583,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.023172761127352715,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016662518493831158,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016662518493831158,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3374159514904022,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41330012679100037,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.05,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03374159559607506,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03374159559607506,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27164267924301744,
|
|
"calibration/batch_distribution_entropy": 0.7208326409001041,
|
|
"calibration/buffer_distribution_entropy": 0.7632079280659739,
|
|
"calibration/confidence_entropy": 0.23629183211104654,
|
|
"calibration/coverage@0%": 0.042578125,
|
|
"calibration/coverage@1%": 0.042578125,
|
|
"calibration/coverage@10%": 0.171484375,
|
|
"calibration/coverage@15%": 0.3296875,
|
|
"calibration/coverage@20%": 0.406640625,
|
|
"calibration/coverage@25%": 0.487890625,
|
|
"calibration/coverage@30%": 0.565234375,
|
|
"calibration/coverage@5%": 0.085546875,
|
|
"calibration/ece": 0.14933425701114764,
|
|
"calibration/mean_confidence": 0.5480320473725643,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 477.2,
|
|
"completions/max_terminated_length": 477.2,
|
|
"completions/mean_length": 192.9146484375,
|
|
"completions/mean_terminated_length": 192.97347412109374,
|
|
"completions/min_length": 56.4,
|
|
"completions/min_terminated_length": 94.4,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0015635039890184999,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0008,
|
|
"num_tokens": 608850611.0,
|
|
"reward": 1.2522411346435547,
|
|
"reward_std": 0.15446202158927919,
|
|
"rewards/accuracy_reward": 0.4966796875,
|
|
"rewards/batch_coverage_0": 0.525709193944931,
|
|
"rewards/batch_coverage_1": 0.525709193944931,
|
|
"rewards/batch_coverage_10": 0.5797426462173462,
|
|
"rewards/batch_coverage_15": 0.5850045204162597,
|
|
"rewards/batch_coverage_20": 0.5929110765457153,
|
|
"rewards/batch_coverage_25": 0.5955699563026429,
|
|
"rewards/batch_coverage_5": 0.5522035241127015,
|
|
"rewards/brier_reward": 0.7857939243316651,
|
|
"rewards/confidence_uniqueness_reward": 0.8544246196746826,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.003201226470991969,
|
|
"rewards/frontier_ece_reward": 0.013790114596486092,
|
|
"rewards/frontier_entropy_batch_reward": -0.5694931745529175,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07906494140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10888843834400178,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039532470703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039532470703125,
|
|
"signal/advantage_abs_mean": 0.11520133465528488,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11520133465528488,
|
|
"signal/advantage_pre_scale_std": 0.19038280248641967,
|
|
"signal/advantage_std": 0.19038280248641967,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1342694342136383,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1771334409713745,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013426943868398666,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.013426943868398666,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1342694342136383,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1771334409713745,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013426943868398666,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.013426943868398666,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.14773536026477813,
|
|
"signal/batch_coverage_10/group_std_mean": 0.19756182432174682,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014773536287248135,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.014773536287248135,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.15087927877902985,
|
|
"signal/batch_coverage_15/group_std_mean": 0.20164599716663362,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015087928250432014,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.015087928250432014,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1539299249649048,
|
|
"signal/batch_coverage_20/group_std_mean": 0.20657563507556914,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01539299227297306,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01539299227297306,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.15464832186698912,
|
|
"signal/batch_coverage_25/group_std_mean": 0.20785588324069976,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015464832447469235,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.015464832447469235,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13860245048999786,
|
|
"signal/batch_coverage_5/group_std_mean": 0.18345766067504882,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013860245980322362,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013860245980322362,
|
|
"signal/brier_reward/centered_abs_mean": 0.1274334356188774,
|
|
"signal/brier_reward/group_std_mean": 0.16567680537700652,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012743343599140644,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012743343599140644,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07045024782419204,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09113750010728836,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070450249128043655,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070450249128043655,
|
|
"signal/format_reward/centered_abs_mean": 0.00074462890625,
|
|
"signal/format_reward/group_std_mean": 0.0018734002020210027,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021882928907871247,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032797419466078282,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7353662517271005e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7353662517271005e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015608221106231213,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.022461001202464102,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015608221292495728,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015608221292495728,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32071712613105774,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3963967502117157,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03207171261310578,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03207171261310578,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2978480243770719,
|
|
"calibration/batch_distribution_entropy": 0.6714511049146308,
|
|
"calibration/buffer_distribution_entropy": 0.7618800137320163,
|
|
"calibration/confidence_entropy": 0.20542300175159228,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.2408054060665362,
|
|
"calibration/coverage@15%": 0.3275654354207436,
|
|
"calibration/coverage@20%": 0.4197965080724071,
|
|
"calibration/coverage@25%": 0.5444311093444227,
|
|
"calibration/coverage@30%": 0.5936727923189824,
|
|
"calibration/coverage@5%": 0.187620780332681,
|
|
"calibration/ece": 0.16323717985124106,
|
|
"calibration/mean_confidence": 0.5272898830970567,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 527.4,
|
|
"completions/max_terminated_length": 527.4,
|
|
"completions/mean_length": 193.88447265625,
|
|
"completions/mean_terminated_length": 193.92267150878905,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 92.6,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0016659012762829661,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 626003700.0,
|
|
"reward": 1.2439999103546142,
|
|
"reward_std": 0.15648065507411957,
|
|
"rewards/accuracy_reward": 0.48837890625,
|
|
"rewards/batch_coverage_0": 0.5292935132980346,
|
|
"rewards/batch_coverage_1": 0.5292935132980346,
|
|
"rewards/batch_coverage_10": 0.5772804141044616,
|
|
"rewards/batch_coverage_15": 0.5800390005111694,
|
|
"rewards/batch_coverage_20": 0.5900392889976501,
|
|
"rewards/batch_coverage_25": 0.5927664875984192,
|
|
"rewards/batch_coverage_5": 0.5604206562042237,
|
|
"rewards/brier_reward": 0.7853564500808716,
|
|
"rewards/confidence_uniqueness_reward": 0.833574116230011,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.003251887438818812,
|
|
"rewards/frontier_ece_reward": 0.013345247507095337,
|
|
"rewards/frontier_entropy_batch_reward": -0.5919211268424988,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088482666015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12249124199151992,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442413330078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0442413330078125,
|
|
"signal/advantage_abs_mean": 0.11981900781393051,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11981900781393051,
|
|
"signal/advantage_pre_scale_std": 0.1967749923467636,
|
|
"signal/advantage_std": 0.1967749923467636,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.14945873618125916,
|
|
"signal/batch_coverage_0/group_std_mean": 0.19603240489959717,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.014945873618125915,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.014945873618125915,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.14945873618125916,
|
|
"signal/batch_coverage_1/group_std_mean": 0.19603240489959717,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.014945873618125915,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.014945873618125915,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.15743134617805482,
|
|
"signal/batch_coverage_10/group_std_mean": 0.20761019885540008,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.015743134170770647,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.015743134170770647,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.1575777143239975,
|
|
"signal/batch_coverage_15/group_std_mean": 0.20775634944438934,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01575777158141136,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01575777158141136,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1627923756837845,
|
|
"signal/batch_coverage_20/group_std_mean": 0.21458121240139008,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016279237903654577,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.016279237903654577,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.16482252776622772,
|
|
"signal/batch_coverage_25/group_std_mean": 0.21744664311408995,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016482253558933735,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.016482253558933735,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.15643575489521028,
|
|
"signal/batch_coverage_5/group_std_mean": 0.20531278550624849,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015643575973808766,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.015643575973808766,
|
|
"signal/brier_reward/centered_abs_mean": 0.13729436993598937,
|
|
"signal/brier_reward/group_std_mean": 0.17434926629066466,
|
|
"signal/brier_reward/group_zero_std_frac": 0.009375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013729437068104745,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013729437068104745,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08399370908737183,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10645784288644791,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00839937087148428,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00839937087148428,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002287204097956419,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003136986354365945,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8590050715138204e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8590050715138204e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015637117996811868,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.022227041050791742,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015637118136510252,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015637118136510252,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3143742084503174,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39200871586799624,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03143742233514786,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03143742233514786,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24269278070177402,
|
|
"calibration/batch_distribution_entropy": 0.6160343742853823,
|
|
"calibration/buffer_distribution_entropy": 0.7586620888676098,
|
|
"calibration/confidence_entropy": 0.18859843739186988,
|
|
"calibration/coverage@0%": 0.054296875,
|
|
"calibration/coverage@1%": 0.054296875,
|
|
"calibration/coverage@10%": 0.3044653799019608,
|
|
"calibration/coverage@15%": 0.438109681372549,
|
|
"calibration/coverage@20%": 0.5072886029411764,
|
|
"calibration/coverage@25%": 0.5827129289215687,
|
|
"calibration/coverage@30%": 0.6284742647058824,
|
|
"calibration/coverage@5%": 0.105859375,
|
|
"calibration/ece": 0.1277596892426967,
|
|
"calibration/mean_confidence": 0.4764249202730988,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 485.4,
|
|
"completions/max_terminated_length": 485.4,
|
|
"completions/mean_length": 191.13359375,
|
|
"completions/mean_terminated_length": 191.20944213867188,
|
|
"completions/min_length": 36.4,
|
|
"completions/min_terminated_length": 94.8,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0018507551867514849,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 642960396.0,
|
|
"reward": 1.2837910652160645,
|
|
"reward_std": 0.13548575043678285,
|
|
"rewards/accuracy_reward": 0.4826171875,
|
|
"rewards/batch_coverage_0": 0.5999539256095886,
|
|
"rewards/batch_coverage_1": 0.5999539256095886,
|
|
"rewards/batch_coverage_10": 0.6299304246902466,
|
|
"rewards/batch_coverage_15": 0.6342156767845154,
|
|
"rewards/batch_coverage_20": 0.641483461856842,
|
|
"rewards/batch_coverage_25": 0.645661735534668,
|
|
"rewards/batch_coverage_5": 0.6170846819877625,
|
|
"rewards/brier_reward": 0.8336545467376709,
|
|
"rewards/confidence_uniqueness_reward": 0.8381247282028198,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0023502955213189126,
|
|
"rewards/frontier_ece_reward": 0.016457681730389594,
|
|
"rewards/frontier_entropy_batch_reward": -0.6294492602348327,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0827392578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10903512537479401,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04136962890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04136962890625,
|
|
"signal/advantage_abs_mean": 0.10070695728063583,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10070695728063583,
|
|
"signal/advantage_pre_scale_std": 0.17648605406284332,
|
|
"signal/advantage_std": 0.17648605406284332,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12757147401571273,
|
|
"signal/batch_coverage_0/group_std_mean": 0.17173427641391753,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01275714747607708,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01275714747607708,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12757147401571273,
|
|
"signal/batch_coverage_1/group_std_mean": 0.17173427641391753,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01275714747607708,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01275714747607708,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13172535002231597,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17821555435657502,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01317253541201353,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01317253541201353,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13351904302835466,
|
|
"signal/batch_coverage_15/group_std_mean": 0.18080573976039888,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013351904228329659,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013351904228329659,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.13711758852005004,
|
|
"signal/batch_coverage_20/group_std_mean": 0.18629833459854125,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013711759075522422,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013711759075522422,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.1425417572259903,
|
|
"signal/batch_coverage_25/group_std_mean": 0.19299682080745698,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014254176057875156,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.014254176057875156,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1316314160823822,
|
|
"signal/batch_coverage_5/group_std_mean": 0.17660426199436188,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0131631413474679,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0131631413474679,
|
|
"signal/brier_reward/centered_abs_mean": 0.1148117184638977,
|
|
"signal/brier_reward/group_std_mean": 0.15035415887832643,
|
|
"signal/brier_reward/group_zero_std_frac": 0.021875,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01148117184638977,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01148117184638977,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07413349598646164,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0926214724779129,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.021875,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0074133495800197124,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0074133495800197124,
|
|
"signal/format_reward/centered_abs_mean": 0.00074462890625,
|
|
"signal/format_reward/group_std_mean": 0.0018734002020210027,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014779501361772418,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002129552699625492,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8474376884114462e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8474376884114462e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014195657894015312,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.020859728381037713,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014195657800883054,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014195657800883054,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3070155918598175,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38327420949935914,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0307015597820282,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0307015597820282,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22339887578048012,
|
|
"calibration/batch_distribution_entropy": 0.7031810388951124,
|
|
"calibration/buffer_distribution_entropy": 0.7558727835947326,
|
|
"calibration/confidence_entropy": 0.23025108654477772,
|
|
"calibration/coverage@0%": 0.0171875,
|
|
"calibration/coverage@1%": 0.0171875,
|
|
"calibration/coverage@10%": 0.3052164872798434,
|
|
"calibration/coverage@15%": 0.42831993028375737,
|
|
"calibration/coverage@20%": 0.47638973825831704,
|
|
"calibration/coverage@25%": 0.6150952482876713,
|
|
"calibration/coverage@30%": 0.6717610995596869,
|
|
"calibration/coverage@5%": 0.17548235689823874,
|
|
"calibration/ece": 0.13616244771925584,
|
|
"calibration/mean_confidence": 0.5098264604818323,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 504.2,
|
|
"completions/max_terminated_length": 504.2,
|
|
"completions/mean_length": 196.30712890625,
|
|
"completions/mean_terminated_length": 196.3457824707031,
|
|
"completions/min_length": 57.4,
|
|
"completions/min_terminated_length": 92.6,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.0016954562161117792,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 660314485.0,
|
|
"reward": 1.278175687789917,
|
|
"reward_std": 0.1583867758512497,
|
|
"rewards/accuracy_reward": 0.507421875,
|
|
"rewards/batch_coverage_0": 0.5575575470924378,
|
|
"rewards/batch_coverage_1": 0.5575575470924378,
|
|
"rewards/batch_coverage_10": 0.596357774734497,
|
|
"rewards/batch_coverage_15": 0.6038527488708496,
|
|
"rewards/batch_coverage_20": 0.6089828252792359,
|
|
"rewards/batch_coverage_25": 0.6120626330375671,
|
|
"rewards/batch_coverage_5": 0.5829194903373718,
|
|
"rewards/brier_reward": 0.8186290860176086,
|
|
"rewards/confidence_uniqueness_reward": 0.8801854848861694,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0023375329561531543,
|
|
"rewards/frontier_ece_reward": 0.015244904533028602,
|
|
"rewards/frontier_entropy_batch_reward": -0.5869454622268677,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1125134527683258,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0421875,
|
|
"signal/advantage_abs_mean": 0.1217761904001236,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1217761904001236,
|
|
"signal/advantage_pre_scale_std": 0.19559427797794343,
|
|
"signal/advantage_std": 0.19559427797794343,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15701129138469697,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20394995510578157,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.015701129287481307,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.015701129287481307,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15701129138469697,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20394995510578157,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.015701129287481307,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.015701129287481307,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.16522761583328247,
|
|
"signal/batch_coverage_10/group_std_mean": 0.21434899270534516,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016522761806845664,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.016522761806845664,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.16934556365013123,
|
|
"signal/batch_coverage_15/group_std_mean": 0.21964602470397948,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016934556514024736,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.016934556514024736,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.17093099951744078,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2225628077983856,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017093100026249884,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.017093100026249884,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.17006333768367768,
|
|
"signal/batch_coverage_25/group_std_mean": 0.22242553234100343,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01700633317232132,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01700633317232132,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1607960045337677,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2090065598487854,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016079600527882577,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.016079600527882577,
|
|
"signal/brier_reward/centered_abs_mean": 0.1368030786514282,
|
|
"signal/brier_reward/group_std_mean": 0.1751306027173996,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013680307939648629,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013680307939648629,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05568226352334023,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07030727565288544,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005568226426839828,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005568226426839828,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015239943517372013,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022282961290329695,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9049930051551202e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9049930051551202e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015793051198124887,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.022042370960116388,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015793051803484558,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015793051803484558,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3185978889465332,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4003509938716888,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.059375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031859788671135904,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031859788671135904,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25818907579779654,
|
|
"calibration/batch_distribution_entropy": 0.6443005343910162,
|
|
"calibration/buffer_distribution_entropy": 0.7526658672700117,
|
|
"calibration/confidence_entropy": 0.21598953797233658,
|
|
"calibration/coverage@0%": 0.023091976516634048,
|
|
"calibration/coverage@1%": 0.023091976516634048,
|
|
"calibration/coverage@10%": 0.14755381604696674,
|
|
"calibration/coverage@15%": 0.3197651663405088,
|
|
"calibration/coverage@20%": 0.4682699363992172,
|
|
"calibration/coverage@25%": 0.5401923312133072,
|
|
"calibration/coverage@30%": 0.6648116438356164,
|
|
"calibration/coverage@5%": 0.1365949119373777,
|
|
"calibration/ece": 0.159088284084341,
|
|
"calibration/mean_confidence": 0.6250040115878907,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 575.8,
|
|
"completions/max_terminated_length": 575.8,
|
|
"completions/mean_length": 193.503125,
|
|
"completions/mean_terminated_length": 193.63548889160157,
|
|
"completions/min_length": 18.2,
|
|
"completions/min_terminated_length": 91.8,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0019849766977131367,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 677638645.0,
|
|
"reward": 1.3034381151199341,
|
|
"reward_std": 0.14825326800346375,
|
|
"rewards/accuracy_reward": 0.5529296875,
|
|
"rewards/batch_coverage_0": 0.5572910845279694,
|
|
"rewards/batch_coverage_1": 0.5572910845279694,
|
|
"rewards/batch_coverage_10": 0.6068256497383118,
|
|
"rewards/batch_coverage_15": 0.6149711847305298,
|
|
"rewards/batch_coverage_20": 0.6288576126098633,
|
|
"rewards/batch_coverage_25": 0.6334639191627502,
|
|
"rewards/batch_coverage_5": 0.5896883249282837,
|
|
"rewards/brier_reward": 0.8173499464988708,
|
|
"rewards/confidence_uniqueness_reward": 0.8553975939750671,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.002552823629230261,
|
|
"rewards/frontier_ece_reward": 0.016957807727158068,
|
|
"rewards/frontier_entropy_batch_reward": -0.604624617099762,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07900390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10646929293870926,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039501953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039501953125,
|
|
"signal/advantage_abs_mean": 0.11008306294679641,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11008306294679641,
|
|
"signal/advantage_pre_scale_std": 0.18596419095993041,
|
|
"signal/advantage_std": 0.18596419095993041,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12932612597942353,
|
|
"signal/batch_coverage_0/group_std_mean": 0.17381446361541747,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012932613119482994,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012932613119482994,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12932612597942353,
|
|
"signal/batch_coverage_1/group_std_mean": 0.17381446361541747,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012932613119482994,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012932613119482994,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1382578879594803,
|
|
"signal/batch_coverage_10/group_std_mean": 0.18876586258411407,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013825790025293827,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013825790025293827,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.1413852721452713,
|
|
"signal/batch_coverage_15/group_std_mean": 0.19356752038002015,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01413852721452713,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01413852721452713,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.14702281653881072,
|
|
"signal/batch_coverage_20/group_std_mean": 0.2009480744600296,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01470228172838688,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01470228172838688,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.14943260848522186,
|
|
"signal/batch_coverage_25/group_std_mean": 0.20391323864459993,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01494326014071703,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01494326014071703,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13566993772983552,
|
|
"signal/batch_coverage_5/group_std_mean": 0.1848902851343155,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013566994294524192,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013566994294524192,
|
|
"signal/brier_reward/centered_abs_mean": 0.1233207032084465,
|
|
"signal/brier_reward/group_std_mean": 0.16159166097640992,
|
|
"signal/brier_reward/group_zero_std_frac": 0.00625,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012332070805132388,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012332070805132388,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07103251814842224,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09032833874225617,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00625,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007103251945227385,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007103251945227385,
|
|
"signal/format_reward/centered_abs_mean": 0.001251220703125,
|
|
"signal/format_reward/group_std_mean": 0.0027073150966316463,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006256103515625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006256103515625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002036643889732659,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002958608232438564,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.545804818510078e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.545804818510078e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014836767874658107,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02108708433806896,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001483676815405488,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001483676815405488,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30819383859634397,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38484176993370056,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.109375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0308193851262331,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0308193851262331,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.45066771849639575,
|
|
"eval_calibration/batch_distribution_entropy": 0.6308715104303002,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7501602692465891,
|
|
"eval_calibration/confidence_entropy": 0.23284911409052508,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0,
|
|
"eval_calibration/coverage@15%": 0.0,
|
|
"eval_calibration/coverage@20%": 0.125,
|
|
"eval_calibration/coverage@25%": 0.28125,
|
|
"eval_calibration/coverage@30%": 0.3125,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.2192780273711064,
|
|
"eval_calibration/mean_confidence": 0.5297764255255403,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 365.5,
|
|
"eval_completions/max_terminated_length": 365.5,
|
|
"eval_completions/mean_length": 198.55636978149414,
|
|
"eval_completions/mean_terminated_length": 198.55636978149414,
|
|
"eval_completions/min_length": 105.5,
|
|
"eval_completions/min_terminated_length": 105.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 677638645.0,
|
|
"eval_reward": 0.9594081491231918,
|
|
"eval_reward_std": 0.2795492261648178,
|
|
"eval_rewards/accuracy_reward": 0.400390625,
|
|
"eval_rewards/batch_coverage_0": 0.2906555011868477,
|
|
"eval_rewards/batch_coverage_1": 0.2906555011868477,
|
|
"eval_rewards/batch_coverage_10": 0.2898253872990608,
|
|
"eval_rewards/batch_coverage_15": 0.28895024210214615,
|
|
"eval_rewards/batch_coverage_20": 0.269227497279644,
|
|
"eval_rewards/batch_coverage_25": 0.25441256538033485,
|
|
"eval_rewards/batch_coverage_5": 0.2906555011868477,
|
|
"eval_rewards/brier_reward": 0.7779050469398499,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.82861328125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.003932581515982747,
|
|
"eval_rewards/frontier_ece_reward": 0.011719705304130912,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 20.7013,
|
|
"eval_samples_per_second": 24.153,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4666748046875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.490452878177166,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23333740234375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23333740234375,
|
|
"eval_signal/advantage_abs_mean": 0.2340683601796627,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2340683601796627,
|
|
"eval_signal/advantage_pre_scale_std": 0.2773290351033211,
|
|
"eval_signal/advantage_std": 0.2773290351033211,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.42698580026626587,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.4923050254583359,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04269858077168465,
|
|
"eval_signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04269858077168465,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.42698580026626587,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.4923050254583359,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04269858077168465,
|
|
"eval_signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04269858077168465,
|
|
"eval_signal/batch_coverage_10/centered_abs_mean": 0.42510540038347244,
|
|
"eval_signal/batch_coverage_10/group_std_mean": 0.49010204523801804,
|
|
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.042510541155934334,
|
|
"eval_signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.042510541155934334,
|
|
"eval_signal/batch_coverage_15/centered_abs_mean": 0.4231926202774048,
|
|
"eval_signal/batch_coverage_15/group_std_mean": 0.4875435531139374,
|
|
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04231926240026951,
|
|
"eval_signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04231926240026951,
|
|
"eval_signal/batch_coverage_20/centered_abs_mean": 0.3872489780187607,
|
|
"eval_signal/batch_coverage_20/group_std_mean": 0.4467836171388626,
|
|
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03872489836066961,
|
|
"eval_signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.03872489836066961,
|
|
"eval_signal/batch_coverage_25/centered_abs_mean": 0.36741621047258377,
|
|
"eval_signal/batch_coverage_25/group_std_mean": 0.4262534826993942,
|
|
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.036741622257977724,
|
|
"eval_signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.036741622257977724,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.42698580026626587,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.4923050254583359,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04269858077168465,
|
|
"eval_signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04269858077168465,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2969773858785629,
|
|
"eval_signal/brier_reward/group_std_mean": 0.3588665947318077,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029697738122195005,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.029697738122195005,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0925445556640625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10798590630292892,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00925445614848286,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00925445614848286,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004777178633958101,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008755038492381573,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9714732742577326e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9714732742577326e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.026464423164725304,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.03283689636737108,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026464423281140625,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026464423281140625,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.193,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.436145941498868,
|
|
"calibration/batch_distribution_entropy": 0.7614577097539426,
|
|
"calibration/buffer_distribution_entropy": 0.7494956520521233,
|
|
"calibration/confidence_entropy": 0.2691995055392518,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.105859375,
|
|
"calibration/coverage@25%": 0.16328125,
|
|
"calibration/coverage@30%": 0.283203125,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.20043935589572012,
|
|
"calibration/mean_confidence": 0.5348283324646669,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 510.6,
|
|
"completions/max_terminated_length": 510.6,
|
|
"completions/mean_length": 195.7046875,
|
|
"completions/mean_terminated_length": 195.7046875,
|
|
"completions/min_length": 96.0,
|
|
"completions/min_terminated_length": 96.0,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0017912992043420672,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 694499205.0,
|
|
"reward": 1.2205629348754883,
|
|
"reward_std": 0.15728234946727754,
|
|
"rewards/accuracy_reward": 0.46318359375,
|
|
"rewards/batch_coverage_0": 0.5111707746982574,
|
|
"rewards/batch_coverage_1": 0.5111707746982574,
|
|
"rewards/batch_coverage_10": 0.5461826920509338,
|
|
"rewards/batch_coverage_15": 0.5565583229064941,
|
|
"rewards/batch_coverage_20": 0.5595410823822021,
|
|
"rewards/batch_coverage_25": 0.5592182993888855,
|
|
"rewards/batch_coverage_5": 0.5302507996559143,
|
|
"rewards/brier_reward": 0.7698781847953796,
|
|
"rewards/confidence_uniqueness_reward": 0.8930137634277344,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.003443529363721609,
|
|
"rewards/frontier_ece_reward": 0.011358432192355394,
|
|
"rewards/frontier_entropy_batch_reward": -0.558201253414154,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085833740234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1159292846918106,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0429168701171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0429168701171875,
|
|
"signal/advantage_abs_mean": 0.11985027641057969,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11985027641057969,
|
|
"signal/advantage_pre_scale_std": 0.19015521705150604,
|
|
"signal/advantage_std": 0.19015521705150604,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.13516520261764525,
|
|
"signal/batch_coverage_0/group_std_mean": 0.18049061596393584,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013516520708799362,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.013516520708799362,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.13516520261764525,
|
|
"signal/batch_coverage_1/group_std_mean": 0.18049061596393584,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013516520708799362,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.013516520708799362,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1454785704612732,
|
|
"signal/batch_coverage_10/group_std_mean": 0.19473540186882018,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014547857455909252,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.014547857455909252,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.15078432261943817,
|
|
"signal/batch_coverage_15/group_std_mean": 0.20216354429721833,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015078432485461235,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.015078432485461235,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.15291909277439117,
|
|
"signal/batch_coverage_20/group_std_mean": 0.20492708683013916,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.015291909500956536,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.015291909500956536,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.15271320343017578,
|
|
"signal/batch_coverage_25/group_std_mean": 0.20409930348396302,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015271320939064026,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.015271320939064026,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1402449667453766,
|
|
"signal/batch_coverage_5/group_std_mean": 0.18734357357025147,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0140244971960783,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0140244971960783,
|
|
"signal/brier_reward/centered_abs_mean": 0.1363556757569313,
|
|
"signal/brier_reward/group_std_mean": 0.17585920095443724,
|
|
"signal/brier_reward/group_zero_std_frac": 0.009375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01363556794822216,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01363556794822216,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04828674793243408,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.060622844845056534,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004828674811869859,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004828674811869859,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002189541654661298,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030809998977929355,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.736927053774707e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.736927053774707e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016461933963000774,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.023596494644880294,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016461933497339486,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016461933497339486,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3272219479084015,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4061711668968201,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032722195237874986,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032722195237874986,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27816703918639335,
|
|
"calibration/batch_distribution_entropy": 0.6903424523444357,
|
|
"calibration/buffer_distribution_entropy": 0.7495368119155803,
|
|
"calibration/confidence_entropy": 0.2347079734115539,
|
|
"calibration/coverage@0%": 0.05390625,
|
|
"calibration/coverage@1%": 0.076953125,
|
|
"calibration/coverage@10%": 0.1948452818627451,
|
|
"calibration/coverage@15%": 0.29333180147058824,
|
|
"calibration/coverage@20%": 0.36292126225490196,
|
|
"calibration/coverage@25%": 0.411812193627451,
|
|
"calibration/coverage@30%": 0.47671109068627454,
|
|
"calibration/coverage@5%": 0.14984987745098038,
|
|
"calibration/ece": 0.1697572193501926,
|
|
"calibration/mean_confidence": 0.5357356714814479,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 553.0,
|
|
"completions/max_terminated_length": 553.0,
|
|
"completions/mean_length": 197.2208984375,
|
|
"completions/mean_terminated_length": 197.27909240722656,
|
|
"completions/min_length": 55.6,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.001681040390394628,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 711432187.0,
|
|
"reward": 1.264276671409607,
|
|
"reward_std": 0.14050707817077637,
|
|
"rewards/accuracy_reward": 0.496875,
|
|
"rewards/batch_coverage_0": 0.5329373478889465,
|
|
"rewards/batch_coverage_1": 0.5329373478889465,
|
|
"rewards/batch_coverage_10": 0.5927770495414734,
|
|
"rewards/batch_coverage_15": 0.6036754965782165,
|
|
"rewards/batch_coverage_20": 0.6099525451660156,
|
|
"rewards/batch_coverage_25": 0.6132030963897706,
|
|
"rewards/batch_coverage_5": 0.5753331422805786,
|
|
"rewards/brier_reward": 0.8148468971252442,
|
|
"rewards/confidence_uniqueness_reward": 0.872640061378479,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002430691896006465,
|
|
"rewards/frontier_ece_reward": 0.014318699017167091,
|
|
"rewards/frontier_entropy_batch_reward": -0.6024617195129395,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08050537109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11048125326633454,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040252685546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.040252685546875,
|
|
"signal/advantage_abs_mean": 0.10363083332777023,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10363083332777023,
|
|
"signal/advantage_pre_scale_std": 0.17971519827842714,
|
|
"signal/advantage_std": 0.17971519827842714,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12535898834466935,
|
|
"signal/batch_coverage_0/group_std_mean": 0.16480962038040162,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012535898946225643,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012535898946225643,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12535898834466935,
|
|
"signal/batch_coverage_1/group_std_mean": 0.16480962038040162,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012535898946225643,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012535898946225643,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13579677045345306,
|
|
"signal/batch_coverage_10/group_std_mean": 0.1822267711162567,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01357967797666788,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01357967797666788,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13899961709976197,
|
|
"signal/batch_coverage_15/group_std_mean": 0.1863336056470871,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013899962231516839,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013899962231516839,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1399351730942726,
|
|
"signal/batch_coverage_20/group_std_mean": 0.18823565542697906,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01399351805448532,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01399351805448532,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.14212610721588134,
|
|
"signal/batch_coverage_25/group_std_mean": 0.19051645696163177,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014212611131370068,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.014212611131370068,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13201508820056915,
|
|
"signal/batch_coverage_5/group_std_mean": 0.17619396150112152,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013201508484780789,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013201508484780789,
|
|
"signal/brier_reward/centered_abs_mean": 0.11381059437990189,
|
|
"signal/brier_reward/group_std_mean": 0.14819636046886445,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011381059512495994,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011381059512495994,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.054254206269979476,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06855006217956543,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005425420589745045,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005425420589745045,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013720685848966241,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0019520026398822665,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7150857092929073e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7150857092929073e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013757929392158984,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.019465847685933114,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013757929671555757,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013757929671555757,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3010860621929169,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37441075444221494,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.096875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030108605325222016,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030108605325222016,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3334160486367128,
|
|
"calibration/batch_distribution_entropy": 0.7490269037549024,
|
|
"calibration/buffer_distribution_entropy": 0.7462465371181635,
|
|
"calibration/confidence_entropy": 0.269153724018258,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.139453125,
|
|
"calibration/coverage@15%": 0.2046875,
|
|
"calibration/coverage@20%": 0.35078125,
|
|
"calibration/coverage@25%": 0.413671875,
|
|
"calibration/coverage@30%": 0.467578125,
|
|
"calibration/coverage@5%": 0.01328125,
|
|
"calibration/ece": 0.1800994213842444,
|
|
"calibration/mean_confidence": 0.4994494024658354,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 597.0,
|
|
"completions/max_terminated_length": 597.0,
|
|
"completions/mean_length": 203.0525390625,
|
|
"completions/mean_terminated_length": 203.0525390625,
|
|
"completions/min_length": 102.4,
|
|
"completions/min_terminated_length": 102.4,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0015836784150451422,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 728465365.0,
|
|
"reward": 1.2559720993041992,
|
|
"reward_std": 0.1384851634502411,
|
|
"rewards/accuracy_reward": 0.50068359375,
|
|
"rewards/batch_coverage_0": 0.531680804491043,
|
|
"rewards/batch_coverage_1": 0.531680804491043,
|
|
"rewards/batch_coverage_10": 0.573654568195343,
|
|
"rewards/batch_coverage_15": 0.5778225183486938,
|
|
"rewards/batch_coverage_20": 0.5828616857528687,
|
|
"rewards/batch_coverage_25": 0.586078429222107,
|
|
"rewards/batch_coverage_5": 0.558163869380951,
|
|
"rewards/brier_reward": 0.8070386648178101,
|
|
"rewards/confidence_uniqueness_reward": 0.88953857421875,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0022389247780665754,
|
|
"rewards/frontier_ece_reward": 0.014052477292716503,
|
|
"rewards/frontier_entropy_batch_reward": -0.5959893703460694,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.090765380859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11465331614017486,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0453826904296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0453826904296875,
|
|
"signal/advantage_abs_mean": 0.10710875988006592,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10710875988006592,
|
|
"signal/advantage_pre_scale_std": 0.1759258270263672,
|
|
"signal/advantage_std": 0.1759258270263672,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12940916717052459,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1708761364221573,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012940916605293751,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012940916605293751,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12940916717052459,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1708761364221573,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012940916605293751,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012940916605293751,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1361958459019661,
|
|
"signal/batch_coverage_10/group_std_mean": 0.1809363543987274,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0136195857077837,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0136195857077837,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13794144690036775,
|
|
"signal/batch_coverage_15/group_std_mean": 0.18336707949638367,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013794144801795482,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013794144801795482,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1340498149394989,
|
|
"signal/batch_coverage_20/group_std_mean": 0.18017881512641906,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013404982350766658,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013404982350766658,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13671108484268188,
|
|
"signal/batch_coverage_25/group_std_mean": 0.18310289680957795,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013671108894050122,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013671108894050122,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13302011638879777,
|
|
"signal/batch_coverage_5/group_std_mean": 0.17659396231174468,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013302012160420418,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013302012160420418,
|
|
"signal/brier_reward/centered_abs_mean": 0.12155969887971878,
|
|
"signal/brier_reward/group_std_mean": 0.15423001050949098,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012155969999730587,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012155969999730587,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045909452438354495,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.056747060269117355,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004590945364907384,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004590945364907384,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011943122604861855,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0016959201195277274,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4928904056432657e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4928904056432657e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01512746512889862,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021570420265197753,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015127464896067977,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015127464896067977,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29471340775489807,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37298219203948973,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02947134114801884,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02947134114801884,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2355489229172564,
|
|
"calibration/batch_distribution_entropy": 0.640870734793577,
|
|
"calibration/buffer_distribution_entropy": 0.745662741922789,
|
|
"calibration/confidence_entropy": 0.2047382660438588,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0390625,
|
|
"calibration/coverage@10%": 0.18203125,
|
|
"calibration/coverage@15%": 0.29609375,
|
|
"calibration/coverage@20%": 0.441015625,
|
|
"calibration/coverage@25%": 0.612109375,
|
|
"calibration/coverage@30%": 0.72578125,
|
|
"calibration/coverage@5%": 0.06875,
|
|
"calibration/ece": 0.14953982855671993,
|
|
"calibration/mean_confidence": 0.5587629108731182,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 576.4,
|
|
"completions/max_terminated_length": 576.4,
|
|
"completions/mean_length": 201.7212890625,
|
|
"completions/mean_terminated_length": 201.74125061035156,
|
|
"completions/min_length": 80.0,
|
|
"completions/min_terminated_length": 99.6,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.0029429446440190077,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 745397135.0,
|
|
"reward": 1.297256064414978,
|
|
"reward_std": 0.14224921464920043,
|
|
"rewards/accuracy_reward": 0.5150390625,
|
|
"rewards/batch_coverage_0": 0.58274827003479,
|
|
"rewards/batch_coverage_1": 0.58274827003479,
|
|
"rewards/batch_coverage_10": 0.6211008548736572,
|
|
"rewards/batch_coverage_15": 0.6245938181877136,
|
|
"rewards/batch_coverage_20": 0.6274907231330872,
|
|
"rewards/batch_coverage_25": 0.6309248447418213,
|
|
"rewards/batch_coverage_5": 0.6062975645065307,
|
|
"rewards/brier_reward": 0.8248634696006775,
|
|
"rewards/confidence_uniqueness_reward": 0.8828810691833496,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0022668347228318453,
|
|
"rewards/frontier_ece_reward": 0.015720732137560846,
|
|
"rewards/frontier_entropy_batch_reward": -0.6007444858551025,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07073974609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.09534884691238403,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035369873046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035369873046875,
|
|
"signal/advantage_abs_mean": 0.10416051000356674,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10416051000356674,
|
|
"signal/advantage_pre_scale_std": 0.17625333666801452,
|
|
"signal/advantage_std": 0.17625333666801452,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.13438412249088288,
|
|
"signal/batch_coverage_0/group_std_mean": 0.18114750385284423,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013438411988317966,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.013438411988317966,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.13438412249088288,
|
|
"signal/batch_coverage_1/group_std_mean": 0.18114750385284423,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013438411988317966,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.013438411988317966,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1422608584165573,
|
|
"signal/batch_coverage_10/group_std_mean": 0.1935015231370926,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014226085878908635,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.014226085878908635,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.14202981293201447,
|
|
"signal/batch_coverage_15/group_std_mean": 0.19373472929000854,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014202981814742088,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.014202981814742088,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.14324666261672975,
|
|
"signal/batch_coverage_20/group_std_mean": 0.19504115581512452,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014324666187167167,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.014324666187167167,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.14737085103988648,
|
|
"signal/batch_coverage_25/group_std_mean": 0.19997539818286897,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014737085625529289,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.014737085625529289,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1389584869146347,
|
|
"signal/batch_coverage_5/group_std_mean": 0.18836452662944794,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013895849138498307,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013895849138498307,
|
|
"signal/brier_reward/centered_abs_mean": 0.11510290652513504,
|
|
"signal/brier_reward/group_std_mean": 0.15334579348564148,
|
|
"signal/brier_reward/group_zero_std_frac": 0.009375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011510290764272213,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011510290764272213,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05111872330307961,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06499804481863976,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005111872497946024,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005111872497946024,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013043643673881888,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0019197963876649738,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6304554083035328e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6304554083035328e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014480138197541237,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021022016927599908,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014480138663202525,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014480138663202525,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3132603347301483,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38880074620246885,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031326035782694814,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031326035782694814,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24117854419539175,
|
|
"calibration/batch_distribution_entropy": 0.6917096147909891,
|
|
"calibration/buffer_distribution_entropy": 0.7415210871493679,
|
|
"calibration/confidence_entropy": 0.21799001668587992,
|
|
"calibration/coverage@0%": 0.038671875,
|
|
"calibration/coverage@1%": 0.054296875,
|
|
"calibration/coverage@10%": 0.2015625,
|
|
"calibration/coverage@15%": 0.2765625,
|
|
"calibration/coverage@20%": 0.51484375,
|
|
"calibration/coverage@25%": 0.621875,
|
|
"calibration/coverage@30%": 0.670703125,
|
|
"calibration/coverage@5%": 0.142578125,
|
|
"calibration/ece": 0.1317607727717102,
|
|
"calibration/mean_confidence": 0.5068727895744845,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 512.4,
|
|
"completions/max_terminated_length": 512.4,
|
|
"completions/mean_length": 202.0845703125,
|
|
"completions/mean_terminated_length": 202.0845703125,
|
|
"completions/min_length": 95.4,
|
|
"completions/min_terminated_length": 95.4,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0018140418687835336,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 762476337.0,
|
|
"reward": 1.2914434671401978,
|
|
"reward_std": 0.13651327788829803,
|
|
"rewards/accuracy_reward": 0.533984375,
|
|
"rewards/batch_coverage_0": 0.5698285698890686,
|
|
"rewards/batch_coverage_1": 0.5698285698890686,
|
|
"rewards/batch_coverage_10": 0.6016792893409729,
|
|
"rewards/batch_coverage_15": 0.6043854236602784,
|
|
"rewards/batch_coverage_20": 0.608850610256195,
|
|
"rewards/batch_coverage_25": 0.6127121925354004,
|
|
"rewards/batch_coverage_5": 0.5894207715988159,
|
|
"rewards/brier_reward": 0.8136415481567383,
|
|
"rewards/confidence_uniqueness_reward": 0.8610710144042969,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.00200494471937418,
|
|
"rewards/frontier_ece_reward": 0.013372968323528767,
|
|
"rewards/frontier_entropy_batch_reward": -0.600027346611023,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08544921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10862117111682892,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.70625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042724609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042724609375,
|
|
"signal/advantage_abs_mean": 0.10617989897727967,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10617989897727967,
|
|
"signal/advantage_pre_scale_std": 0.17531263828277588,
|
|
"signal/advantage_std": 0.17531263828277588,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.13560049831867219,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1758313685655594,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013560050167143345,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.013560050167143345,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.13560049831867219,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1758313685655594,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013560050167143345,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.013560050167143345,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13977060616016387,
|
|
"signal/batch_coverage_10/group_std_mean": 0.1825422078371048,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013977061398327351,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013977061398327351,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.14058802425861358,
|
|
"signal/batch_coverage_15/group_std_mean": 0.18333434462547302,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014058802835643291,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.014058802835643291,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.13900991082191466,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1814419984817505,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013900990970432759,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013900990970432759,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.1429667666554451,
|
|
"signal/batch_coverage_25/group_std_mean": 0.186186283826828,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01429667677730322,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01429667677730322,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13655753284692765,
|
|
"signal/batch_coverage_5/group_std_mean": 0.17737014293670655,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013655753619968892,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013655753619968892,
|
|
"signal/brier_reward/centered_abs_mean": 0.12080991268157959,
|
|
"signal/brier_reward/group_std_mean": 0.15258357524871827,
|
|
"signal/brier_reward/group_zero_std_frac": 0.015625,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012080991454422475,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012080991454422475,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05788230895996094,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07352784276008606,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.015625,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005788230989128351,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005788230989128351,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001230324897915125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0017709915526211261,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5379060823761392e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5379060823761392e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01474948674440384,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.020724740251898764,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001474948669783771,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001474948669783771,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29659512639045715,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36827229857444765,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029659513384103775,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029659513384103775,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2523398918321875,
|
|
"calibration/batch_distribution_entropy": 0.7061118258311535,
|
|
"calibration/buffer_distribution_entropy": 0.7388043538380987,
|
|
"calibration/confidence_entropy": 0.22098310735812107,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.050390625,
|
|
"calibration/coverage@10%": 0.0875,
|
|
"calibration/coverage@15%": 0.3453125,
|
|
"calibration/coverage@20%": 0.467578125,
|
|
"calibration/coverage@25%": 0.596875,
|
|
"calibration/coverage@30%": 0.673828125,
|
|
"calibration/coverage@5%": 0.073046875,
|
|
"calibration/ece": 0.17879724224531168,
|
|
"calibration/mean_confidence": 0.6009326217256324,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 518.2,
|
|
"completions/max_terminated_length": 518.2,
|
|
"completions/mean_length": 197.7541015625,
|
|
"completions/mean_terminated_length": 197.8127685546875,
|
|
"completions/min_length": 54.8,
|
|
"completions/min_terminated_length": 93.4,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.0016909514088183641,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 779440923.0,
|
|
"reward": 1.2925546407699584,
|
|
"reward_std": 0.1415196657180786,
|
|
"rewards/accuracy_reward": 0.54072265625,
|
|
"rewards/batch_coverage_0": 0.5608146786689758,
|
|
"rewards/batch_coverage_1": 0.5608146786689758,
|
|
"rewards/batch_coverage_10": 0.594396460056305,
|
|
"rewards/batch_coverage_15": 0.5996437549591065,
|
|
"rewards/batch_coverage_20": 0.6081606268882751,
|
|
"rewards/batch_coverage_25": 0.6098075866699219,
|
|
"rewards/batch_coverage_5": 0.5855140924453736,
|
|
"rewards/brier_reward": 0.8093111157417298,
|
|
"rewards/confidence_uniqueness_reward": 0.8509040236473083,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0025167773477733136,
|
|
"rewards/frontier_ece_reward": 0.013303298316895962,
|
|
"rewards/frontier_entropy_batch_reward": -0.5689576864242554,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080865478515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10603559166193008,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.7,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0404327392578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0404327392578125,
|
|
"signal/advantage_abs_mean": 0.1061436727643013,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1061436727643013,
|
|
"signal/advantage_pre_scale_std": 0.1785441070795059,
|
|
"signal/advantage_std": 0.1785441070795059,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12894578129053116,
|
|
"signal/batch_coverage_0/group_std_mean": 0.16901729106903077,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012894578650593758,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012894578650593758,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12894578129053116,
|
|
"signal/batch_coverage_1/group_std_mean": 0.16901729106903077,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012894578650593758,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012894578650593758,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13518355786800385,
|
|
"signal/batch_coverage_10/group_std_mean": 0.1799877792596817,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013518355973064899,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013518355973064899,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13510996997356414,
|
|
"signal/batch_coverage_15/group_std_mean": 0.18045617043972015,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013510997965931892,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013510997965931892,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1411931872367859,
|
|
"signal/batch_coverage_20/group_std_mean": 0.18840061128139496,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014119319431483746,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.014119319431483746,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.14360681772232056,
|
|
"signal/batch_coverage_25/group_std_mean": 0.19102306962013244,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014360682666301727,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.014360682666301727,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13322059959173202,
|
|
"signal/batch_coverage_5/group_std_mean": 0.177263942360878,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01332206018269062,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01332206018269062,
|
|
"signal/brier_reward/centered_abs_mean": 0.11520067602396011,
|
|
"signal/brier_reward/group_std_mean": 0.1510821118950844,
|
|
"signal/brier_reward/group_zero_std_frac": 0.00625,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011520067788660526,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011520067788660526,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07168898284435272,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08953625559806824,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00625,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007168898358941078,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007168898358941078,
|
|
"signal/format_reward/centered_abs_mean": 0.000555419921875,
|
|
"signal/format_reward/group_std_mean": 0.0013209730386734009,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001666014944203198,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002366511942818761,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.082518840325065e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.082518840325065e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014191032014787197,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.020698029920458792,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001419103262014687,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001419103262014687,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30279971957206725,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38480743765830994,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.059375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03027997352182865,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03027997352182865,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24314173580957563,
|
|
"calibration/batch_distribution_entropy": 0.7123511896880926,
|
|
"calibration/buffer_distribution_entropy": 0.7371898135623439,
|
|
"calibration/confidence_entropy": 0.22319584805799755,
|
|
"calibration/coverage@0%": 0.009784735812133072,
|
|
"calibration/coverage@1%": 0.009784735812133072,
|
|
"calibration/coverage@10%": 0.25522718933463795,
|
|
"calibration/coverage@15%": 0.35569349315068494,
|
|
"calibration/coverage@20%": 0.4861928204500979,
|
|
"calibration/coverage@25%": 0.5659001956947163,
|
|
"calibration/coverage@30%": 0.6616400134540117,
|
|
"calibration/coverage@5%": 0.1485475782778865,
|
|
"calibration/ece": 0.13654095046647682,
|
|
"calibration/mean_confidence": 0.5268796281130378,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 445.2,
|
|
"completions/max_terminated_length": 445.2,
|
|
"completions/mean_length": 192.54716796875,
|
|
"completions/mean_terminated_length": 192.56629943847656,
|
|
"completions/min_length": 74.8,
|
|
"completions/min_terminated_length": 93.0,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.0022267785388976336,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 796639806.0,
|
|
"reward": 1.291816520690918,
|
|
"reward_std": 0.13338485807180406,
|
|
"rewards/accuracy_reward": 0.5451171875,
|
|
"rewards/batch_coverage_0": 0.5534084558486938,
|
|
"rewards/batch_coverage_1": 0.5534084558486938,
|
|
"rewards/batch_coverage_10": 0.59267737865448,
|
|
"rewards/batch_coverage_15": 0.5985195159912109,
|
|
"rewards/batch_coverage_20": 0.6050418257713318,
|
|
"rewards/batch_coverage_25": 0.6092750668525696,
|
|
"rewards/batch_coverage_5": 0.5784332752227783,
|
|
"rewards/brier_reward": 0.7937209248542786,
|
|
"rewards/confidence_uniqueness_reward": 0.8617787718772888,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002670770836994052,
|
|
"rewards/frontier_ece_reward": 0.012176255136728287,
|
|
"rewards/frontier_entropy_batch_reward": -0.5650394916534424,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07152099609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.0952995702624321,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.725,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035760498046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035760498046875,
|
|
"signal/advantage_abs_mean": 0.10051141977310181,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10051141977310181,
|
|
"signal/advantage_pre_scale_std": 0.17218650579452516,
|
|
"signal/advantage_std": 0.17218650579452516,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12577387243509291,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1656516134738922,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0125773873180151,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0125773873180151,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12577387243509291,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1656516134738922,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0125773873180151,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0125773873180151,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13408605754375458,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17692401707172395,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013408605940639973,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013408605940639973,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13487772792577743,
|
|
"signal/batch_coverage_15/group_std_mean": 0.17835747599601745,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013487772829830647,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013487772829830647,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.13585045635700227,
|
|
"signal/batch_coverage_20/group_std_mean": 0.18036530017852784,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01358504593372345,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01358504593372345,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13990364819765091,
|
|
"signal/batch_coverage_25/group_std_mean": 0.18538238406181334,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013990364596247673,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013990364596247673,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1291911095380783,
|
|
"signal/batch_coverage_5/group_std_mean": 0.16966727375984192,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.015625,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012919111363589763,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012919111363589763,
|
|
"signal/brier_reward/centered_abs_mean": 0.10877085328102112,
|
|
"signal/brier_reward/group_std_mean": 0.14029166996479034,
|
|
"signal/brier_reward/group_zero_std_frac": 0.015625,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01087708566337824,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01087708566337824,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0628635786473751,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07769058793783187,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.015625,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006286358088254928,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006286358088254928,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015472988365218044,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00224983487278223,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.934123574756086e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.934123574756086e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01419222578406334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.020413671061396597,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014192226575687529,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014192226575687529,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3012643814086914,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3788640916347504,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030126439034938814,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030126439034938814,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21815909386420346,
|
|
"calibration/batch_distribution_entropy": 0.769684210167051,
|
|
"calibration/buffer_distribution_entropy": 0.7371303368412581,
|
|
"calibration/confidence_entropy": 0.2846331447906999,
|
|
"calibration/coverage@0%": 0.053515625,
|
|
"calibration/coverage@1%": 0.182421875,
|
|
"calibration/coverage@10%": 0.357421875,
|
|
"calibration/coverage@15%": 0.451953125,
|
|
"calibration/coverage@20%": 0.51015625,
|
|
"calibration/coverage@25%": 0.562109375,
|
|
"calibration/coverage@30%": 0.643359375,
|
|
"calibration/coverage@5%": 0.248828125,
|
|
"calibration/ece": 0.15731911620859837,
|
|
"calibration/mean_confidence": 0.5699674765575071,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 524.8,
|
|
"completions/max_terminated_length": 524.8,
|
|
"completions/mean_length": 203.03603515625,
|
|
"completions/mean_terminated_length": 203.03603515625,
|
|
"completions/min_length": 92.2,
|
|
"completions/min_terminated_length": 92.2,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.001630605780519545,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 813651599.0,
|
|
"reward": 1.2780816316604615,
|
|
"reward_std": 0.14629258513450621,
|
|
"rewards/accuracy_reward": 0.5091796875,
|
|
"rewards/batch_coverage_0": 0.5428201794624329,
|
|
"rewards/batch_coverage_1": 0.5428201794624329,
|
|
"rewards/batch_coverage_10": 0.5980752348899842,
|
|
"rewards/batch_coverage_15": 0.6076067090034485,
|
|
"rewards/batch_coverage_20": 0.6146794557571411,
|
|
"rewards/batch_coverage_25": 0.6174161672592163,
|
|
"rewards/batch_coverage_5": 0.5823442339897156,
|
|
"rewards/brier_reward": 0.8140376687049866,
|
|
"rewards/confidence_uniqueness_reward": 0.8872955322265625,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0023141817888244986,
|
|
"rewards/frontier_ece_reward": 0.014350495114922524,
|
|
"rewards/frontier_entropy_batch_reward": -0.5862390518188476,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07115478515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.09603603929281235,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035577392578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035577392578125,
|
|
"signal/advantage_abs_mean": 0.10806111246347427,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10806111246347427,
|
|
"signal/advantage_pre_scale_std": 0.17815430760383605,
|
|
"signal/advantage_std": 0.17815430760383605,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12889273911714555,
|
|
"signal/batch_coverage_0/group_std_mean": 0.17496535778045655,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012889273837208748,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012889273837208748,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12889273911714555,
|
|
"signal/batch_coverage_1/group_std_mean": 0.17496535778045655,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012889273837208748,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012889273837208748,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13530712127685546,
|
|
"signal/batch_coverage_10/group_std_mean": 0.18599896728992463,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013530712574720383,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013530712574720383,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13818536698818207,
|
|
"signal/batch_coverage_15/group_std_mean": 0.19022200405597686,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013818537257611752,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013818537257611752,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.14182150661945342,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1955121785402298,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01418215073645115,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01418215073645115,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.14496648013591767,
|
|
"signal/batch_coverage_25/group_std_mean": 0.1991838574409485,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01449664793908596,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01449664793908596,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13354593515396118,
|
|
"signal/batch_coverage_5/group_std_mean": 0.18284645676612854,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013354593142867088,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013354593142867088,
|
|
"signal/brier_reward/centered_abs_mean": 0.11099631637334824,
|
|
"signal/brier_reward/group_std_mean": 0.1493169844150543,
|
|
"signal/brier_reward/group_zero_std_frac": 0.00625,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011099631898105145,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011099631898105145,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05087149143218994,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06436762884259224,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00625,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005087149236351252,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005087149236351252,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014162956038489937,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021210991777479648,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.770369544829009e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.770369544829009e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015161270461976527,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021915557235479353,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00151612707413733,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00151612707413733,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30829928517341615,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3859909653663635,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.084375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030829928815364838,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030829928815364838,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31010626739798186,
|
|
"calibration/batch_distribution_entropy": 0.6831615592574666,
|
|
"calibration/buffer_distribution_entropy": 0.7349332924643199,
|
|
"calibration/confidence_entropy": 0.21844742802956504,
|
|
"calibration/coverage@0%": 0.054296875,
|
|
"calibration/coverage@1%": 0.094140625,
|
|
"calibration/coverage@10%": 0.146484375,
|
|
"calibration/coverage@15%": 0.255859375,
|
|
"calibration/coverage@20%": 0.2851807118395303,
|
|
"calibration/coverage@25%": 0.4258301736790607,
|
|
"calibration/coverage@30%": 0.5031739236790607,
|
|
"calibration/coverage@5%": 0.121875,
|
|
"calibration/ece": 0.19194136252588118,
|
|
"calibration/mean_confidence": 0.5863911610582415,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 681.2,
|
|
"completions/max_terminated_length": 681.2,
|
|
"completions/mean_length": 207.463671875,
|
|
"completions/mean_terminated_length": 207.5250457763672,
|
|
"completions/min_length": 59.6,
|
|
"completions/min_terminated_length": 98.0,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.0015109021915122867,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 830950395.0,
|
|
"reward": 1.268712568283081,
|
|
"reward_std": 0.13139432221651076,
|
|
"rewards/accuracy_reward": 0.52626953125,
|
|
"rewards/batch_coverage_0": 0.5273642539978027,
|
|
"rewards/batch_coverage_1": 0.5273642539978027,
|
|
"rewards/batch_coverage_10": 0.5780391335487366,
|
|
"rewards/batch_coverage_15": 0.5797824621200561,
|
|
"rewards/batch_coverage_20": 0.5853094935417176,
|
|
"rewards/batch_coverage_25": 0.5844082474708557,
|
|
"rewards/batch_coverage_5": 0.5631725430488587,
|
|
"rewards/brier_reward": 0.7936733603477478,
|
|
"rewards/confidence_uniqueness_reward": 0.8792999029159546,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0025360194500535726,
|
|
"rewards/frontier_ece_reward": 0.011786134168505668,
|
|
"rewards/frontier_entropy_batch_reward": -0.5726401686668396,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.086236572265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11323271244764328,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0431182861328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0431182861328125,
|
|
"signal/advantage_abs_mean": 0.09704598337411881,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09704598337411881,
|
|
"signal/advantage_pre_scale_std": 0.16713069379329681,
|
|
"signal/advantage_std": 0.16713069379329681,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.11174871325492859,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1489221602678299,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01117487158626318,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01117487158626318,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.11174871325492859,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1489221602678299,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01117487158626318,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01117487158626318,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.12213961333036423,
|
|
"signal/batch_coverage_10/group_std_mean": 0.16586555540561676,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0122139610350132,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.0122139610350132,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.12145914733409882,
|
|
"signal/batch_coverage_15/group_std_mean": 0.16523004770278932,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012145914323627948,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.012145914323627948,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.12562210708856583,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1706618309020996,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012562210485339165,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.012562210485339165,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.12414962351322174,
|
|
"signal/batch_coverage_25/group_std_mean": 0.16929054260253906,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01241496242582798,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01241496242582798,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.11780698150396347,
|
|
"signal/batch_coverage_5/group_std_mean": 0.1591268002986908,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.011780698224902153,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.011780698224902153,
|
|
"signal/brier_reward/centered_abs_mean": 0.10932108014822006,
|
|
"signal/brier_reward/group_std_mean": 0.1419556260108948,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010932107828557492,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010932107828557492,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05147377476096153,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06528320237994194,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00514737768098712,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00514737768098712,
|
|
"signal/format_reward/centered_abs_mean": 0.000555419921875,
|
|
"signal/format_reward/group_std_mean": 0.0013209730386734009,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014083811081945896,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020011267391964794,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.760476479830686e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.760476479830686e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014706828817725181,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021075991913676262,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014706829097121954,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014706829097121954,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2917087584733963,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3725455284118652,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02917087487876415,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02917087487876415,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19282041934687397,
|
|
"calibration/batch_distribution_entropy": 0.6920062865146607,
|
|
"calibration/buffer_distribution_entropy": 0.7301821197316796,
|
|
"calibration/confidence_entropy": 0.22344387400004026,
|
|
"calibration/coverage@0%": 0.046966731898238745,
|
|
"calibration/coverage@1%": 0.05675146771037182,
|
|
"calibration/coverage@10%": 0.42365612769080235,
|
|
"calibration/coverage@15%": 0.5261764615949118,
|
|
"calibration/coverage@20%": 0.5903368089530333,
|
|
"calibration/coverage@25%": 0.663110781555773,
|
|
"calibration/coverage@30%": 0.7335280088062623,
|
|
"calibration/coverage@5%": 0.27499235567514674,
|
|
"calibration/ece": 0.12089007125129762,
|
|
"calibration/mean_confidence": 0.5416411376509921,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 598.2,
|
|
"completions/max_terminated_length": 598.2,
|
|
"completions/mean_length": 206.33037109375,
|
|
"completions/mean_terminated_length": 206.45191040039063,
|
|
"completions/min_length": 42.2,
|
|
"completions/min_terminated_length": 104.8,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0014231241075322032,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 848073778.0,
|
|
"reward": 1.3338397979736327,
|
|
"reward_std": 0.13077863156795502,
|
|
"rewards/accuracy_reward": 0.56591796875,
|
|
"rewards/batch_coverage_0": 0.5898068666458129,
|
|
"rewards/batch_coverage_1": 0.5898068666458129,
|
|
"rewards/batch_coverage_10": 0.6379106283187866,
|
|
"rewards/batch_coverage_15": 0.6420271754264831,
|
|
"rewards/batch_coverage_20": 0.6438995718955993,
|
|
"rewards/batch_coverage_25": 0.6488496541976929,
|
|
"rewards/batch_coverage_5": 0.6152103543281555,
|
|
"rewards/brier_reward": 0.8356095075607299,
|
|
"rewards/confidence_uniqueness_reward": 0.8643570899963379,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0020930708618834616,
|
|
"rewards/frontier_ece_reward": 0.016211163997650147,
|
|
"rewards/frontier_entropy_batch_reward": -0.5712017893791199,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.072723388671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.09923464208841323,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.70625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363616943359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0363616943359375,
|
|
"signal/advantage_abs_mean": 0.09668900221586227,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09668900221586227,
|
|
"signal/advantage_pre_scale_std": 0.16912133395671844,
|
|
"signal/advantage_std": 0.16912133395671844,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12063909322023392,
|
|
"signal/batch_coverage_0/group_std_mean": 0.15900228917598724,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012063909694552422,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012063909694552422,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12063909322023392,
|
|
"signal/batch_coverage_1/group_std_mean": 0.15900228917598724,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012063909694552422,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012063909694552422,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13073422014713287,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17543690502643586,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013073421642184257,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013073421642184257,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13373751044273377,
|
|
"signal/batch_coverage_15/group_std_mean": 0.17892775535583497,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013373749889433384,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013373749889433384,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1333797663450241,
|
|
"signal/batch_coverage_20/group_std_mean": 0.17859428822994233,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01333797611296177,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01333797611296177,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13365401029586793,
|
|
"signal/batch_coverage_25/group_std_mean": 0.1794481545686722,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013365400955080986,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013365400955080986,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.12666998505592347,
|
|
"signal/batch_coverage_5/group_std_mean": 0.16839803159236907,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012666998617351054,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012666998617351054,
|
|
"signal/brier_reward/centered_abs_mean": 0.10218747854232788,
|
|
"signal/brier_reward/group_std_mean": 0.13509040623903273,
|
|
"signal/brier_reward/group_zero_std_frac": 0.009375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010218747891485691,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010218747891485691,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05471925586462021,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07163973078131676,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005471925716847181,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005471925716847181,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.0038669900968670845,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011783302179537714,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0017042707419022917,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4729128452017903e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4729128452017903e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014615490287542342,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021591220796108247,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014615490566939116,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014615490566939116,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2841952681541443,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35755252838134766,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.115625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028419527411460876,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028419527411460876,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.4775511860244453,
|
|
"eval_calibration/batch_distribution_entropy": 0.6805845499846008,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7287791314575365,
|
|
"eval_calibration/confidence_entropy": 0.2213890362749855,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0859375,
|
|
"eval_calibration/coverage@15%": 0.125,
|
|
"eval_calibration/coverage@20%": 0.1328125,
|
|
"eval_calibration/coverage@25%": 0.2578125,
|
|
"eval_calibration/coverage@30%": 0.2890625,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.2288685063967824,
|
|
"eval_calibration/mean_confidence": 0.5053683476164722,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 554.75,
|
|
"eval_completions/max_terminated_length": 554.75,
|
|
"eval_completions/mean_length": 219.2979507446289,
|
|
"eval_completions/mean_terminated_length": 219.2979507446289,
|
|
"eval_completions/min_length": 116.25,
|
|
"eval_completions/min_terminated_length": 116.25,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 848073778.0,
|
|
"eval_reward": 0.974822074174881,
|
|
"eval_reward_std": 0.27409257739782333,
|
|
"eval_rewards/accuracy_reward": 0.416015625,
|
|
"eval_rewards/batch_coverage_0": 0.30540529638528824,
|
|
"eval_rewards/batch_coverage_1": 0.30540529638528824,
|
|
"eval_rewards/batch_coverage_10": 0.2937457114458084,
|
|
"eval_rewards/batch_coverage_15": 0.29220280796289444,
|
|
"eval_rewards/batch_coverage_20": 0.2766593173146248,
|
|
"eval_rewards/batch_coverage_25": 0.2715606912970543,
|
|
"eval_rewards/batch_coverage_5": 0.30540529638528824,
|
|
"eval_rewards/brier_reward": 0.7973639369010925,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8115234375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0032213623635470867,
|
|
"eval_rewards/frontier_ece_reward": 0.00927359308116138,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 24.8989,
|
|
"eval_samples_per_second": 20.081,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4693603515625,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4918139800429344,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23468017578125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23468017578125,
|
|
"eval_signal/advantage_abs_mean": 0.22791888937354088,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22791888937354088,
|
|
"eval_signal/advantage_pre_scale_std": 0.27271074801683426,
|
|
"eval_signal/advantage_std": 0.27271074801683426,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.4482870399951935,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.5090989097952843,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04482870548963547,
|
|
"eval_signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04482870548963547,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.4482870399951935,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.5090989097952843,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04482870548963547,
|
|
"eval_signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04482870548963547,
|
|
"eval_signal/batch_coverage_10/centered_abs_mean": 0.42958785593509674,
|
|
"eval_signal/batch_coverage_10/group_std_mean": 0.4874923899769783,
|
|
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04295878764241934,
|
|
"eval_signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.04295878764241934,
|
|
"eval_signal/batch_coverage_15/centered_abs_mean": 0.4125537723302841,
|
|
"eval_signal/batch_coverage_15/group_std_mean": 0.46426092088222504,
|
|
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04125537909567356,
|
|
"eval_signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04125537909567356,
|
|
"eval_signal/batch_coverage_20/centered_abs_mean": 0.38762741535902023,
|
|
"eval_signal/batch_coverage_20/group_std_mean": 0.4408438876271248,
|
|
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03876274265348911,
|
|
"eval_signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.03876274265348911,
|
|
"eval_signal/batch_coverage_25/centered_abs_mean": 0.3840094059705734,
|
|
"eval_signal/batch_coverage_25/group_std_mean": 0.4386216476559639,
|
|
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03840094292536378,
|
|
"eval_signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03840094292536378,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.4482870399951935,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.5090989097952843,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04482870548963547,
|
|
"eval_signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04482870548963547,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.27652557939291,
|
|
"eval_signal/brier_reward/group_std_mean": 0.3379634767770767,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027652557473629713,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027652557473629713,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.11602783203125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13252386078238487,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011602783459238708,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011602783459238708,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003930443141143769,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007698433822952211,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.913054181088228e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.913054181088228e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.025017567910254,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.03442148957401514,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025017568841576576,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025017568841576576,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.161,
|
|
"step": 250
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2267960123496567,
|
|
"calibration/batch_distribution_entropy": 0.6801923238285612,
|
|
"calibration/buffer_distribution_entropy": 0.7272822166052157,
|
|
"calibration/confidence_entropy": 0.20849673218961717,
|
|
"calibration/coverage@0%": 0.0296875,
|
|
"calibration/coverage@1%": 0.0296875,
|
|
"calibration/coverage@10%": 0.2459026418786693,
|
|
"calibration/coverage@15%": 0.34717312866927597,
|
|
"calibration/coverage@20%": 0.5267497859589041,
|
|
"calibration/coverage@25%": 0.6791913833170253,
|
|
"calibration/coverage@30%": 0.7862921966731898,
|
|
"calibration/coverage@5%": 0.069140625,
|
|
"calibration/ece": 0.15203698595724888,
|
|
"calibration/mean_confidence": 0.5770622252568032,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 576.2,
|
|
"completions/max_terminated_length": 576.2,
|
|
"completions/mean_length": 212.0181640625,
|
|
"completions/mean_terminated_length": 212.0784149169922,
|
|
"completions/min_length": 60.4,
|
|
"completions/min_terminated_length": 102.2,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.001950260135345161,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 865344012.0,
|
|
"reward": 1.2896551132202148,
|
|
"reward_std": 0.1455328106880188,
|
|
"rewards/accuracy_reward": 0.548828125,
|
|
"rewards/batch_coverage_0": 0.5595304846763611,
|
|
"rewards/batch_coverage_1": 0.5595304846763611,
|
|
"rewards/batch_coverage_10": 0.5896369695663453,
|
|
"rewards/batch_coverage_15": 0.596402394771576,
|
|
"rewards/batch_coverage_20": 0.5992687940597534,
|
|
"rewards/batch_coverage_25": 0.60002201795578,
|
|
"rewards/batch_coverage_5": 0.5778818249702453,
|
|
"rewards/brier_reward": 0.7952520966529846,
|
|
"rewards/confidence_uniqueness_reward": 0.8402364134788514,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0025426708860322835,
|
|
"rewards/frontier_ece_reward": 0.0110880795866251,
|
|
"rewards/frontier_entropy_batch_reward": -0.5746568083763123,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0798828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10603945106267929,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03994140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03994140625,
|
|
"signal/advantage_abs_mean": 0.11010724902153016,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11010724902153016,
|
|
"signal/advantage_pre_scale_std": 0.18228633999824523,
|
|
"signal/advantage_std": 0.18228633999824523,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.13016874790191652,
|
|
"signal/batch_coverage_0/group_std_mean": 0.17292714715003968,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01301687490195036,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01301687490195036,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.13016874790191652,
|
|
"signal/batch_coverage_1/group_std_mean": 0.17292714715003968,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01301687490195036,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01301687490195036,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1363198786973953,
|
|
"signal/batch_coverage_10/group_std_mean": 0.1838128536939621,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013631988130509854,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013631988130509854,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13772199749946595,
|
|
"signal/batch_coverage_15/group_std_mean": 0.18577627539634706,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013772199861705303,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013772199861705303,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.13926664888858795,
|
|
"signal/batch_coverage_20/group_std_mean": 0.18798602223396302,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013926665298640728,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013926665298640728,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13877122104167938,
|
|
"signal/batch_coverage_25/group_std_mean": 0.18786073625087737,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013877122662961483,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013877122662961483,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13375141620635986,
|
|
"signal/batch_coverage_5/group_std_mean": 0.17892039716243743,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01337514165788889,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.01337514165788889,
|
|
"signal/brier_reward/centered_abs_mean": 0.12692040652036668,
|
|
"signal/brier_reward/group_std_mean": 0.16331054866313935,
|
|
"signal/brier_reward/group_zero_std_frac": 0.009375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012692040763795376,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012692040763795376,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07178411781787872,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08999869525432587,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00717841163277626,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00717841163277626,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015543691348284482,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002139846235513687,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.942961334862048e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.942961334862048e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015287755616009235,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.022031076624989508,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015287755988538266,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015287755988538266,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29790628552436826,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3725601613521576,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.13125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02979062981903553,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02979062981903553,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3024189916118689,
|
|
"calibration/batch_distribution_entropy": 0.6861718769182762,
|
|
"calibration/buffer_distribution_entropy": 0.7247281231943082,
|
|
"calibration/confidence_entropy": 0.2141393949059299,
|
|
"calibration/coverage@0%": 0.042578125,
|
|
"calibration/coverage@1%": 0.04609375,
|
|
"calibration/coverage@10%": 0.201171875,
|
|
"calibration/coverage@15%": 0.2602418664383562,
|
|
"calibration/coverage@20%": 0.28878042441291585,
|
|
"calibration/coverage@25%": 0.30363487646771037,
|
|
"calibration/coverage@30%": 0.5067927470645792,
|
|
"calibration/coverage@5%": 0.18671875,
|
|
"calibration/ece": 0.14507543516520943,
|
|
"calibration/mean_confidence": 0.5084626958457709,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 563.8,
|
|
"completions/max_terminated_length": 563.8,
|
|
"completions/mean_length": 214.33603515625,
|
|
"completions/mean_terminated_length": 214.35703125,
|
|
"completions/min_length": 81.0,
|
|
"completions/min_terminated_length": 102.2,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0017113488866016269,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 882547165.0,
|
|
"reward": 1.3040168046951295,
|
|
"reward_std": 0.12452108860015869,
|
|
"rewards/accuracy_reward": 0.54287109375,
|
|
"rewards/batch_coverage_0": 0.5823396205902099,
|
|
"rewards/batch_coverage_1": 0.5823396205902099,
|
|
"rewards/batch_coverage_10": 0.6146724224090576,
|
|
"rewards/batch_coverage_15": 0.6178886413574218,
|
|
"rewards/batch_coverage_20": 0.6242651224136353,
|
|
"rewards/batch_coverage_25": 0.6258123993873597,
|
|
"rewards/batch_coverage_5": 0.6028478622436524,
|
|
"rewards/brier_reward": 0.8178551077842713,
|
|
"rewards/confidence_uniqueness_reward": 0.8379308581352234,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002056396356783807,
|
|
"rewards/frontier_ece_reward": 0.012563115172088146,
|
|
"rewards/frontier_entropy_batch_reward": -0.5914687752723694,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076885986328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1008117601275444,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0384429931640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0384429931640625,
|
|
"signal/advantage_abs_mean": 0.09464630335569382,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09464630335569382,
|
|
"signal/advantage_pre_scale_std": 0.16419532597064973,
|
|
"signal/advantage_std": 0.16419532597064973,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.11629042327404022,
|
|
"signal/batch_coverage_0/group_std_mean": 0.15208458602428437,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011629042960703374,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.011629042960703374,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.11629042327404022,
|
|
"signal/batch_coverage_1/group_std_mean": 0.15208458602428437,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011629042960703374,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.011629042960703374,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.12401841282844543,
|
|
"signal/batch_coverage_10/group_std_mean": 0.1630850315093994,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012401841208338737,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.012401841208338737,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.12051795870065689,
|
|
"signal/batch_coverage_15/group_std_mean": 0.15964794158935547,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012051796354353429,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.012051796354353429,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.12344660609960556,
|
|
"signal/batch_coverage_20/group_std_mean": 0.16371358335018157,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012344660982489587,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.012344660982489587,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.12385808080434799,
|
|
"signal/batch_coverage_25/group_std_mean": 0.16440981924533843,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012385808303952217,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.012385808303952217,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.12124198824167251,
|
|
"signal/batch_coverage_5/group_std_mean": 0.15847708582878112,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012124198861420155,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012124198861420155,
|
|
"signal/brier_reward/centered_abs_mean": 0.10539275407791138,
|
|
"signal/brier_reward/group_std_mean": 0.13495143502950668,
|
|
"signal/brier_reward/group_zero_std_frac": 0.01875,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010539275407791138,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010539275407791138,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06620322167873383,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08328029215335846,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.01875,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0066203223541378975,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0066203223541378975,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012305549927987159,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00173785334918648,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5381937919300982e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5381937919300982e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014193679951131345,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02084806077182293,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.034375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014193680603057147,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014193680603057147,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28034199476242067,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3522096395492554,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.14375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02803419977426529,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02803419977426529,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29979283230660575,
|
|
"calibration/batch_distribution_entropy": 0.6765818092872171,
|
|
"calibration/buffer_distribution_entropy": 0.7230590012667675,
|
|
"calibration/confidence_entropy": 0.22264761767104865,
|
|
"calibration/coverage@0%": 0.05626605308219178,
|
|
"calibration/coverage@1%": 0.06915667808219178,
|
|
"calibration/coverage@10%": 0.2630664444716243,
|
|
"calibration/coverage@15%": 0.4146755748532289,
|
|
"calibration/coverage@20%": 0.4494603106653621,
|
|
"calibration/coverage@25%": 0.4838528926125244,
|
|
"calibration/coverage@30%": 0.5705968688845401,
|
|
"calibration/coverage@5%": 0.13172089041095889,
|
|
"calibration/ece": 0.17770718042535594,
|
|
"calibration/mean_confidence": 0.6034304608609038,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 800.0,
|
|
"completions/max_terminated_length": 800.0,
|
|
"completions/mean_length": 212.58818359375,
|
|
"completions/mean_terminated_length": 212.6305938720703,
|
|
"completions/min_length": 60.6,
|
|
"completions/min_terminated_length": 98.8,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.0015765472780913115,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 899738436.0,
|
|
"reward": 1.2874624252319335,
|
|
"reward_std": 0.13262848556041718,
|
|
"rewards/accuracy_reward": 0.51318359375,
|
|
"rewards/batch_coverage_0": 0.5744598150253296,
|
|
"rewards/batch_coverage_1": 0.5744598150253296,
|
|
"rewards/batch_coverage_10": 0.603915560245514,
|
|
"rewards/batch_coverage_15": 0.6057839632034302,
|
|
"rewards/batch_coverage_20": 0.6098087549209594,
|
|
"rewards/batch_coverage_25": 0.6135491847991943,
|
|
"rewards/batch_coverage_5": 0.595538854598999,
|
|
"rewards/brier_reward": 0.806088674068451,
|
|
"rewards/confidence_uniqueness_reward": 0.8638126015663147,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002301845629699528,
|
|
"rewards/frontier_ece_reward": 0.01232540961354971,
|
|
"rewards/frontier_entropy_batch_reward": -0.5492840528488159,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.061627197265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.08691354244947433,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.728125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0308135986328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0308135986328125,
|
|
"signal/advantage_abs_mean": 0.10021563172340393,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10021563172340393,
|
|
"signal/advantage_pre_scale_std": 0.1665943294763565,
|
|
"signal/advantage_std": 0.1665943294763565,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12552589774131775,
|
|
"signal/batch_coverage_0/group_std_mean": 0.164810910820961,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012552590481936932,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012552590481936932,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12552589774131775,
|
|
"signal/batch_coverage_1/group_std_mean": 0.164810910820961,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012552590481936932,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012552590481936932,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1308951810002327,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17249469459056854,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013089518807828427,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013089518807828427,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13017829209566117,
|
|
"signal/batch_coverage_15/group_std_mean": 0.1718754142522812,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01301782913506031,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01301782913506031,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.13185496330261232,
|
|
"signal/batch_coverage_20/group_std_mean": 0.17450777292251587,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013185496255755425,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013185496255755425,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13360297530889512,
|
|
"signal/batch_coverage_25/group_std_mean": 0.17730580568313598,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013360296934843063,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013360296934843063,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.12875944674015044,
|
|
"signal/batch_coverage_5/group_std_mean": 0.16948716938495637,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.00625,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012875944934785366,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012875944934785366,
|
|
"signal/brier_reward/centered_abs_mean": 0.11305683553218841,
|
|
"signal/brier_reward/group_std_mean": 0.14634765088558196,
|
|
"signal/brier_reward/group_zero_std_frac": 0.00625,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01130568366497755,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01130568366497755,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05614908337593079,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07217531129717827,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00625,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005614908412098885,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005614908412098885,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013377515482716263,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001973598566837609,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6721895008231513e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6721895008231513e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01556033082306385,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021966927498579026,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015560331754386424,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015560331754386424,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2900454640388489,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3650382697582245,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029004548117518424,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029004548117518424,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2759403497776601,
|
|
"calibration/batch_distribution_entropy": 0.6759332309201006,
|
|
"calibration/buffer_distribution_entropy": 0.7218298133224164,
|
|
"calibration/confidence_entropy": 0.22823251290571217,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.1109375,
|
|
"calibration/coverage@15%": 0.2078125,
|
|
"calibration/coverage@20%": 0.36640625,
|
|
"calibration/coverage@25%": 0.43671875,
|
|
"calibration/coverage@30%": 0.518359375,
|
|
"calibration/coverage@5%": 0.09609375,
|
|
"calibration/ece": 0.21517310261243403,
|
|
"calibration/mean_confidence": 0.6667003336869002,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001171875,
|
|
"completions/max_length": 676.0,
|
|
"completions/max_terminated_length": 676.0,
|
|
"completions/mean_length": 215.37490234375,
|
|
"completions/mean_terminated_length": 215.62887268066407,
|
|
"completions/min_length": 45.4,
|
|
"completions/min_terminated_length": 109.8,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.0014087754534557462,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 916930691.0,
|
|
"reward": 1.2985878467559815,
|
|
"reward_std": 0.13927391171455383,
|
|
"rewards/accuracy_reward": 0.5646484375,
|
|
"rewards/batch_coverage_0": 0.5505944132804871,
|
|
"rewards/batch_coverage_1": 0.5505944132804871,
|
|
"rewards/batch_coverage_10": 0.5925943732261658,
|
|
"rewards/batch_coverage_15": 0.5966384470462799,
|
|
"rewards/batch_coverage_20": 0.600320303440094,
|
|
"rewards/batch_coverage_25": 0.6009154736995697,
|
|
"rewards/batch_coverage_5": 0.5749504089355468,
|
|
"rewards/brier_reward": 0.8001022934913635,
|
|
"rewards/confidence_uniqueness_reward": 0.8556058764457702,
|
|
"rewards/format_reward": 0.998828125,
|
|
"rewards/frontier_aurc_reward": -0.0022088738158345224,
|
|
"rewards/frontier_ece_reward": 0.012191972695291042,
|
|
"rewards/frontier_entropy_batch_reward": -0.5657361030578614,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0737060546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.09959790781140328,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03685302734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03685302734375,
|
|
"signal/advantage_abs_mean": 0.10322214066982269,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10322214066982269,
|
|
"signal/advantage_pre_scale_std": 0.17601919770240784,
|
|
"signal/advantage_std": 0.17601919770240784,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12593218684196472,
|
|
"signal/batch_coverage_0/group_std_mean": 0.16748648285865783,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012593218125402927,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012593218125402927,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12593218684196472,
|
|
"signal/batch_coverage_1/group_std_mean": 0.16748648285865783,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012593218125402927,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012593218125402927,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13293709754943847,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17932658195495604,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01329371016472578,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01329371016472578,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13411223590373994,
|
|
"signal/batch_coverage_15/group_std_mean": 0.1812635064125061,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013411223329603672,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013411223329603672,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1337016761302948,
|
|
"signal/batch_coverage_20/group_std_mean": 0.18126676678657533,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013370167277753354,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013370167277753354,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13443489372730255,
|
|
"signal/batch_coverage_25/group_std_mean": 0.18231047987937926,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013443489000201226,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013443489000201226,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1292974293231964,
|
|
"signal/batch_coverage_5/group_std_mean": 0.17306924760341644,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.01875,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012929742969572545,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012929742969572545,
|
|
"signal/brier_reward/centered_abs_mean": 0.1134807363152504,
|
|
"signal/brier_reward/group_std_mean": 0.14848156571388244,
|
|
"signal/brier_reward/group_zero_std_frac": 0.01875,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011348073929548263,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011348073929548263,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05932857394218445,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07796282023191452,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.01875,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005932857375591993,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005932857375591993,
|
|
"signal/format_reward/centered_abs_mean": 0.00208740234375,
|
|
"signal/format_reward/group_std_mean": 0.004553806083276868,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001043701171875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001043701171875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012687626876868308,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001827608421444893,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5859533777984326e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5859533777984326e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016462472081184388,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.024404867365956307,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016462472500279545,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016462472500279545,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.285188752412796,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3623782157897949,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028518874570727347,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028518874570727347,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3703553403819811,
|
|
"calibration/batch_distribution_entropy": 0.7133684850336566,
|
|
"calibration/buffer_distribution_entropy": 0.7202831683503106,
|
|
"calibration/confidence_entropy": 0.22813746743108307,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.031702544031311154,
|
|
"calibration/coverage@20%": 0.031702544031311154,
|
|
"calibration/coverage@25%": 0.22447330601761256,
|
|
"calibration/coverage@30%": 0.3096616621819961,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.19746284505661899,
|
|
"calibration/mean_confidence": 0.5590293023641005,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 641.2,
|
|
"completions/max_terminated_length": 641.2,
|
|
"completions/mean_length": 215.3197265625,
|
|
"completions/mean_terminated_length": 215.38296813964843,
|
|
"completions/min_length": 62.2,
|
|
"completions/min_terminated_length": 105.6,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0035550326574593782,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0013,
|
|
"num_tokens": 934282637.0,
|
|
"reward": 1.2669667959213258,
|
|
"reward_std": 0.13528041839599608,
|
|
"rewards/accuracy_reward": 0.49970703125,
|
|
"rewards/batch_coverage_0": 0.5631029605865479,
|
|
"rewards/batch_coverage_1": 0.5631029605865479,
|
|
"rewards/batch_coverage_10": 0.5879178166389465,
|
|
"rewards/batch_coverage_15": 0.5897319793701172,
|
|
"rewards/batch_coverage_20": 0.5955337166786194,
|
|
"rewards/batch_coverage_25": 0.6003549456596374,
|
|
"rewards/batch_coverage_5": 0.5774053573608399,
|
|
"rewards/brier_reward": 0.8021393895149231,
|
|
"rewards/confidence_uniqueness_reward": 0.8372437953948975,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0027949722949415444,
|
|
"rewards/frontier_ece_reward": 0.012080707773566245,
|
|
"rewards/frontier_entropy_batch_reward": -0.5556662797927856,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076898193359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.0979268804192543,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.734375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0384490966796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0384490966796875,
|
|
"signal/advantage_abs_mean": 0.10434950143098831,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10434950143098831,
|
|
"signal/advantage_pre_scale_std": 0.171790811419487,
|
|
"signal/advantage_std": 0.171790811419487,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12332225143909455,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1597755014896393,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01233222484588623,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.01233222484588623,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12332225143909455,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1597755014896393,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01233222484588623,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.01233222484588623,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.12346419841051101,
|
|
"signal/batch_coverage_10/group_std_mean": 0.16303113102912903,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01234642006456852,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01234642006456852,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.12326491475105286,
|
|
"signal/batch_coverage_15/group_std_mean": 0.16280312538146974,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012326491996645927,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.012326491996645927,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.12646077275276185,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1670425981283188,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012646077573299408,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.012646077573299408,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13245510756969453,
|
|
"signal/batch_coverage_25/group_std_mean": 0.17370954751968384,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013245511427521705,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013245511427521705,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.12559498399496077,
|
|
"signal/batch_coverage_5/group_std_mean": 0.16357622146606446,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0125594986602664,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0125594986602664,
|
|
"signal/brier_reward/centered_abs_mean": 0.11624975800514221,
|
|
"signal/brier_reward/group_std_mean": 0.14766744375228882,
|
|
"signal/brier_reward/group_zero_std_frac": 0.025,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011624976061284542,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011624976061284542,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06369166523218155,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08169415593147278,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.025,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063691666349768635,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063691666349768635,
|
|
"signal/format_reward/centered_abs_mean": 0.000555419921875,
|
|
"signal/format_reward/group_std_mean": 0.0013209730386734009,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001598638528957963,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002216655877418816,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.998298175749369e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.998298175749369e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014671192318201066,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.019565947353839874,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.05,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014671192737296223,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014671192737296223,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2858834505081177,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36205363273620605,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028588346764445306,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028588346764445306,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3487537494892914,
|
|
"calibration/batch_distribution_entropy": 0.7252037298917273,
|
|
"calibration/buffer_distribution_entropy": 0.7198900225087584,
|
|
"calibration/confidence_entropy": 0.2345359152758087,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.05518590998043053,
|
|
"calibration/coverage@15%": 0.16850461717221135,
|
|
"calibration/coverage@20%": 0.24861561276908023,
|
|
"calibration/coverage@25%": 0.38031127690802347,
|
|
"calibration/coverage@30%": 0.47884968199608613,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.20244766605736125,
|
|
"calibration/mean_confidence": 0.575858111159339,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 777.0,
|
|
"completions/max_terminated_length": 777.0,
|
|
"completions/mean_length": 219.4595703125,
|
|
"completions/mean_terminated_length": 219.52434997558595,
|
|
"completions/min_length": 40.4,
|
|
"completions/min_terminated_length": 101.8,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.0016001098556444049,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 951640751.0,
|
|
"reward": 1.2776224613189697,
|
|
"reward_std": 0.12914247065782547,
|
|
"rewards/accuracy_reward": 0.5078125,
|
|
"rewards/batch_coverage_0": 0.5558313012123108,
|
|
"rewards/batch_coverage_1": 0.5558313012123108,
|
|
"rewards/batch_coverage_10": 0.6021793842315674,
|
|
"rewards/batch_coverage_15": 0.6080036878585815,
|
|
"rewards/batch_coverage_20": 0.6127823710441589,
|
|
"rewards/batch_coverage_25": 0.6156375765800476,
|
|
"rewards/batch_coverage_5": 0.5886821031570435,
|
|
"rewards/brier_reward": 0.8050480842590332,
|
|
"rewards/confidence_uniqueness_reward": 0.8560371041297913,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0025883075315505264,
|
|
"rewards/frontier_ece_reward": 0.012114399252459407,
|
|
"rewards/frontier_entropy_batch_reward": -0.5731969356536866,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.061328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.08608203381299973,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.7375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0306640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0306640625,
|
|
"signal/advantage_abs_mean": 0.0942740187048912,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0942740187048912,
|
|
"signal/advantage_pre_scale_std": 0.16601733565330506,
|
|
"signal/advantage_std": 0.16601733565330506,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.11676376014947891,
|
|
"signal/batch_coverage_0/group_std_mean": 0.15656563341617585,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011676376312971115,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.011676376312971115,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.11676376014947891,
|
|
"signal/batch_coverage_1/group_std_mean": 0.15656563341617585,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011676376312971115,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.011676376312971115,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.12755317836999894,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17383061945438386,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012755317986011505,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.012755317986011505,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.12786179631948472,
|
|
"signal/batch_coverage_15/group_std_mean": 0.17495982646942138,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012786179967224597,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.012786179967224597,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1294369652867317,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1769184023141861,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012943696603178978,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.012943696603178978,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.1298828110098839,
|
|
"signal/batch_coverage_25/group_std_mean": 0.17788674533367158,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01298828087747097,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01298828087747097,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.12295575588941574,
|
|
"signal/batch_coverage_5/group_std_mean": 0.16745466887950897,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.021875,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012295575626194478,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012295575626194478,
|
|
"signal/brier_reward/centered_abs_mean": 0.10006321221590042,
|
|
"signal/brier_reward/group_std_mean": 0.134315325319767,
|
|
"signal/brier_reward/group_zero_std_frac": 0.021875,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010006321221590042,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010006321221590042,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05508278608322144,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0718459963798523,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.021875,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00550827868282795,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00550827868282795,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001336067053489387,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0019159423885867,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.670083911449183e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.670083911449183e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01357954703271389,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.018551066890358923,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.034375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013579546939581633,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013579546939581633,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28496257662773133,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3574348032474518,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.13125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028496256843209267,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028496256843209267,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3279010208833354,
|
|
"calibration/batch_distribution_entropy": 0.7316795531083246,
|
|
"calibration/buffer_distribution_entropy": 0.7213093588529274,
|
|
"calibration/confidence_entropy": 0.2522811933581405,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.12012867647058822,
|
|
"calibration/coverage@15%": 0.18855085784313724,
|
|
"calibration/coverage@20%": 0.3635968137254902,
|
|
"calibration/coverage@25%": 0.4284742647058824,
|
|
"calibration/coverage@30%": 0.4957077205882353,
|
|
"calibration/coverage@5%": 0.05514093137254902,
|
|
"calibration/ece": 0.18393924576238366,
|
|
"calibration/mean_confidence": 0.5282364177652383,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 901.4,
|
|
"completions/max_terminated_length": 901.4,
|
|
"completions/mean_length": 219.38662109375,
|
|
"completions/mean_terminated_length": 219.49356689453126,
|
|
"completions/min_length": 39.8,
|
|
"completions/min_terminated_length": 103.0,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.002637675730511546,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 968938566.0,
|
|
"reward": 1.3016446590423585,
|
|
"reward_std": 0.13964517116546632,
|
|
"rewards/accuracy_reward": 0.5302734375,
|
|
"rewards/batch_coverage_0": 0.5741848707199096,
|
|
"rewards/batch_coverage_1": 0.5741848707199096,
|
|
"rewards/batch_coverage_10": 0.617852532863617,
|
|
"rewards/batch_coverage_15": 0.6220511078834534,
|
|
"rewards/batch_coverage_20": 0.6254517316818238,
|
|
"rewards/batch_coverage_25": 0.6275116086006165,
|
|
"rewards/batch_coverage_5": 0.6016942858695984,
|
|
"rewards/brier_reward": 0.8188377737998962,
|
|
"rewards/confidence_uniqueness_reward": 0.8719737768173218,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.001976804086007178,
|
|
"rewards/frontier_ece_reward": 0.012473126128315925,
|
|
"rewards/frontier_entropy_batch_reward": -0.5779598355293274,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.064306640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.0910419762134552,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0321533203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0321533203125,
|
|
"signal/advantage_abs_mean": 0.10506602376699448,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10506602376699448,
|
|
"signal/advantage_pre_scale_std": 0.17485912442207335,
|
|
"signal/advantage_std": 0.17485912442207335,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.131300450861454,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1741587519645691,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013130045123398304,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.013130045123398304,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.131300450861454,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1741587519645691,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013130045123398304,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.013130045123398304,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.14013472199440002,
|
|
"signal/batch_coverage_10/group_std_mean": 0.18686334192752838,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014013472571969032,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.014013472571969032,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.14249201416969298,
|
|
"signal/batch_coverage_15/group_std_mean": 0.19025346934795379,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014249200746417046,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.014249200746417046,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.141669425368309,
|
|
"signal/batch_coverage_20/group_std_mean": 0.19006492495536803,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014166942983865737,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.014166942983865737,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.14289966523647307,
|
|
"signal/batch_coverage_25/group_std_mean": 0.19121894836425782,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01428996678441763,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01428996678441763,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13788548856973648,
|
|
"signal/batch_coverage_5/group_std_mean": 0.18307800889015197,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013788548670709133,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013788548670709133,
|
|
"signal/brier_reward/centered_abs_mean": 0.11448466181755065,
|
|
"signal/brier_reward/group_std_mean": 0.1494537800550461,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011448466219007969,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011448466219007969,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04987005516886711,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.065058371424675,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0049870054237544535,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0049870054237544535,
|
|
"signal/format_reward/centered_abs_mean": 0.001123046875,
|
|
"signal/format_reward/group_std_mean": 0.0029782545287162067,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0005615234375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0010798663133755326,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0016447607893496751,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3498328917194158e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3498328917194158e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014520647190511226,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01967911943793297,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014520647935569286,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014520647935569286,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2984996199607849,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36841660737991333,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.121875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029849962890148164,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029849962890148164,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34628107455417145,
|
|
"calibration/batch_distribution_entropy": 0.7617031711910593,
|
|
"calibration/buffer_distribution_entropy": 0.7273704508223597,
|
|
"calibration/confidence_entropy": 0.2782211951350248,
|
|
"calibration/coverage@0%": 0.003515625,
|
|
"calibration/coverage@1%": 0.003515625,
|
|
"calibration/coverage@10%": 0.09453125,
|
|
"calibration/coverage@15%": 0.124609375,
|
|
"calibration/coverage@20%": 0.220703125,
|
|
"calibration/coverage@25%": 0.296875,
|
|
"calibration/coverage@30%": 0.407421875,
|
|
"calibration/coverage@5%": 0.0484375,
|
|
"calibration/ece": 0.1588166385402925,
|
|
"calibration/mean_confidence": 0.5132860872825648,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 522.6,
|
|
"completions/max_terminated_length": 522.6,
|
|
"completions/mean_length": 214.753515625,
|
|
"completions/mean_terminated_length": 214.81558532714843,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 102.0,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0018001672578975558,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 986164458.0,
|
|
"reward": 1.2805254459381104,
|
|
"reward_std": 0.13228480517864227,
|
|
"rewards/accuracy_reward": 0.52255859375,
|
|
"rewards/batch_coverage_0": 0.5598244071006775,
|
|
"rewards/batch_coverage_1": 0.5598244071006775,
|
|
"rewards/batch_coverage_10": 0.5985792994499206,
|
|
"rewards/batch_coverage_15": 0.5995970368385315,
|
|
"rewards/batch_coverage_20": 0.6019830465316772,
|
|
"rewards/batch_coverage_25": 0.6030240416526794,
|
|
"rewards/batch_coverage_5": 0.5841894626617432,
|
|
"rewards/brier_reward": 0.798598051071167,
|
|
"rewards/confidence_uniqueness_reward": 0.878295361995697,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0020402401685714723,
|
|
"rewards/frontier_ece_reward": 0.010918319411575794,
|
|
"rewards/frontier_entropy_batch_reward": -0.6006525158882141,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.064691162109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.08760243952274323,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.7375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0323455810546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0323455810546875,
|
|
"signal/advantage_abs_mean": 0.09802084267139435,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09802084267139435,
|
|
"signal/advantage_pre_scale_std": 0.16889201402664183,
|
|
"signal/advantage_std": 0.16889201402664183,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12150347679853439,
|
|
"signal/batch_coverage_0/group_std_mean": 0.16013022959232331,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012150347977876664,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012150347977876664,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12150347679853439,
|
|
"signal/batch_coverage_1/group_std_mean": 0.16013022959232331,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012150347977876664,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012150347977876664,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.12959835082292556,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17412001490592957,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01295983549207449,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.01295983549207449,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.12758624851703643,
|
|
"signal/batch_coverage_15/group_std_mean": 0.171616131067276,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012758625112473965,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.012758625112473965,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.12791385054588317,
|
|
"signal/batch_coverage_20/group_std_mean": 0.17244038581848145,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012791384942829609,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.012791384942829609,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.12513090670108795,
|
|
"signal/batch_coverage_25/group_std_mean": 0.16936978101730346,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01251309122890234,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.01251309122890234,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1272565320134163,
|
|
"signal/batch_coverage_5/group_std_mean": 0.1697870075702667,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012725653126835823,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012725653126835823,
|
|
"signal/brier_reward/centered_abs_mean": 0.10803148299455642,
|
|
"signal/brier_reward/group_std_mean": 0.14384068846702575,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010803148709237575,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010803148709237575,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04943007156252861,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06312261894345284,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004943007230758667,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004943007230758667,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0010793962515890597,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0016425697831436992,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3492453581420704e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3492453581420704e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014414687268435955,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.019503265619277954,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014414687175303698,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014414687175303698,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2935112476348877,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3679080069065094,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029351125285029412,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029351125285029412,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24575393115665692,
|
|
"calibration/batch_distribution_entropy": 0.738798308780878,
|
|
"calibration/buffer_distribution_entropy": 0.7321804334521286,
|
|
"calibration/confidence_entropy": 0.26702329875816927,
|
|
"calibration/coverage@0%": 0.067578125,
|
|
"calibration/coverage@1%": 0.074609375,
|
|
"calibration/coverage@10%": 0.281640625,
|
|
"calibration/coverage@15%": 0.35234375,
|
|
"calibration/coverage@20%": 0.448828125,
|
|
"calibration/coverage@25%": 0.537109375,
|
|
"calibration/coverage@30%": 0.61875,
|
|
"calibration/coverage@5%": 0.2203125,
|
|
"calibration/ece": 0.14126106139507846,
|
|
"calibration/mean_confidence": 0.4586294707677744,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 518.8,
|
|
"completions/max_terminated_length": 518.8,
|
|
"completions/mean_length": 220.90615234375,
|
|
"completions/mean_terminated_length": 220.90615234375,
|
|
"completions/min_length": 107.6,
|
|
"completions/min_terminated_length": 107.6,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0014223521575331688,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 1003401961.0,
|
|
"reward": 1.2642857074737548,
|
|
"reward_std": 0.1236835777759552,
|
|
"rewards/accuracy_reward": 0.5099609375,
|
|
"rewards/batch_coverage_0": 0.5456533968448639,
|
|
"rewards/batch_coverage_1": 0.5456533968448639,
|
|
"rewards/batch_coverage_10": 0.5796523928642273,
|
|
"rewards/batch_coverage_15": 0.5821020603179932,
|
|
"rewards/batch_coverage_20": 0.5851274251937866,
|
|
"rewards/batch_coverage_25": 0.5893722891807556,
|
|
"rewards/batch_coverage_5": 0.5687803268432617,
|
|
"rewards/brier_reward": 0.8048572063446044,
|
|
"rewards/confidence_uniqueness_reward": 0.8740989685058593,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0016524946317076684,
|
|
"rewards/frontier_ece_reward": 0.01148709300905466,
|
|
"rewards/frontier_entropy_batch_reward": -0.5935260772705078,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07744140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10488596558570862,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.038720703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.038720703125,
|
|
"signal/advantage_abs_mean": 0.09334558099508286,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09334558099508286,
|
|
"signal/advantage_pre_scale_std": 0.15849049091339112,
|
|
"signal/advantage_std": 0.15849049091339112,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.11165303438901901,
|
|
"signal/batch_coverage_0/group_std_mean": 0.14861661791801453,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011165303364396095,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.011165303364396095,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.11165303438901901,
|
|
"signal/batch_coverage_1/group_std_mean": 0.14861661791801453,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011165303364396095,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.011165303364396095,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.121368607878685,
|
|
"signal/batch_coverage_10/group_std_mean": 0.16234832406044006,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012136861123144627,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.012136861123144627,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.12064623832702637,
|
|
"signal/batch_coverage_15/group_std_mean": 0.16168377697467803,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012064623646438122,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.012064623646438122,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.11667853146791458,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1575889140367508,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011667853407561778,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.011667853407561778,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.12033923119306564,
|
|
"signal/batch_coverage_25/group_std_mean": 0.16211841106414795,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012033923342823983,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.012033923342823983,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.11773080676794052,
|
|
"signal/batch_coverage_5/group_std_mean": 0.15715883374214173,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.011773080937564373,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.011773080937564373,
|
|
"signal/brier_reward/centered_abs_mean": 0.10707180798053742,
|
|
"signal/brier_reward/group_std_mean": 0.13771842420101166,
|
|
"signal/brier_reward/group_zero_std_frac": 0.009375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010707181133329868,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010707181133329868,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.052157020568847655,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06535054147243499,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005215702205896377,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005215702205896377,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0006664380664005876,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.000960760226007551,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.330475884577026e-06,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.330475884577026e-06,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014187652990221978,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.018812255188822746,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014187653549015522,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014187653549015522,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2870850205421448,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36086881160736084,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028708503022789954,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028708503022789954,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29138843660168645,
|
|
"calibration/batch_distribution_entropy": 0.6600876061573718,
|
|
"calibration/buffer_distribution_entropy": 0.734922833534972,
|
|
"calibration/confidence_entropy": 0.21770267843701943,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.080078125,
|
|
"calibration/coverage@15%": 0.323828125,
|
|
"calibration/coverage@20%": 0.360546875,
|
|
"calibration/coverage@25%": 0.498828125,
|
|
"calibration/coverage@30%": 0.5578125,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.18084708634125093,
|
|
"calibration/mean_confidence": 0.5571085219541783,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 558.8,
|
|
"completions/max_terminated_length": 558.8,
|
|
"completions/mean_length": 227.0599609375,
|
|
"completions/mean_terminated_length": 227.1940704345703,
|
|
"completions/min_length": 21.0,
|
|
"completions/min_terminated_length": 104.6,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0019881308544427156,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 1020667375.0,
|
|
"reward": 1.2900043249130249,
|
|
"reward_std": 0.12938351929187775,
|
|
"rewards/accuracy_reward": 0.51416015625,
|
|
"rewards/batch_coverage_0": 0.5834320783615112,
|
|
"rewards/batch_coverage_1": 0.5834320783615112,
|
|
"rewards/batch_coverage_10": 0.6152560353279114,
|
|
"rewards/batch_coverage_15": 0.6167209267616272,
|
|
"rewards/batch_coverage_20": 0.6183749675750733,
|
|
"rewards/batch_coverage_25": 0.6207085251808167,
|
|
"rewards/batch_coverage_5": 0.5995626091957093,
|
|
"rewards/brier_reward": 0.8106549620628357,
|
|
"rewards/confidence_uniqueness_reward": 0.8551827192306518,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.002351419860497117,
|
|
"rewards/frontier_ece_reward": 0.012325448356568814,
|
|
"rewards/frontier_entropy_batch_reward": -0.5826962471008301,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.068255615234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.09154722243547439,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.73125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0341278076171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0341278076171875,
|
|
"signal/advantage_abs_mean": 0.09666871875524521,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09666871875524521,
|
|
"signal/advantage_pre_scale_std": 0.16768636405467988,
|
|
"signal/advantage_std": 0.16768636405467988,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.12134916931390763,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1602302074432373,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012134917080402374,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012134917080402374,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.12134916931390763,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1602302074432373,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012134917080402374,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012134917080402374,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.1270793542265892,
|
|
"signal/batch_coverage_10/group_std_mean": 0.16881133019924163,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012707936018705368,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.012707936018705368,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.12548649162054062,
|
|
"signal/batch_coverage_15/group_std_mean": 0.16757002770900725,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01254864949733019,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01254864949733019,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.1269705682992935,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1693786710500717,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012697057239711285,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.012697057239711285,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.1278000921010971,
|
|
"signal/batch_coverage_25/group_std_mean": 0.17063870429992675,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012780009768903255,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.012780009768903255,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.12397996038198471,
|
|
"signal/batch_coverage_5/group_std_mean": 0.1644669473171234,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012397996336221694,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012397996336221694,
|
|
"signal/brier_reward/centered_abs_mean": 0.10672413557767868,
|
|
"signal/brier_reward/group_std_mean": 0.13898140490055083,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010672413557767869,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010672413557767869,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0589519664645195,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07484492361545562,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005895196599885822,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005895196599885822,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.0038669900968670845,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011933622765354813,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0016293063759803771,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4917028420313726e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4917028420313726e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013726240582764148,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.018288495391607283,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013726240722462535,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013726240722462535,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27692450284957887,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35284039974212644,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.109375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027692450582981108,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027692450582981108,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.43738341336907305,
|
|
"eval_calibration/batch_distribution_entropy": 0.6284206643934745,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7325220520469002,
|
|
"eval_calibration/confidence_entropy": 0.20829797554021534,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0,
|
|
"eval_calibration/coverage@15%": 0.0,
|
|
"eval_calibration/coverage@20%": 0.3125,
|
|
"eval_calibration/coverage@25%": 0.328125,
|
|
"eval_calibration/coverage@30%": 0.34375,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.20566188520005838,
|
|
"eval_calibration/mean_confidence": 0.5225051785518035,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 439.5,
|
|
"eval_completions/max_terminated_length": 439.5,
|
|
"eval_completions/mean_length": 233.93871307373047,
|
|
"eval_completions/mean_terminated_length": 233.93871307373047,
|
|
"eval_completions/min_length": 117.5,
|
|
"eval_completions/min_terminated_length": 117.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1020667375.0,
|
|
"eval_reward": 0.9656747579574585,
|
|
"eval_reward_std": 0.26930802315473557,
|
|
"eval_rewards/accuracy_reward": 0.427734375,
|
|
"eval_rewards/batch_coverage_0": 0.28614822030067444,
|
|
"eval_rewards/batch_coverage_1": 0.28614822030067444,
|
|
"eval_rewards/batch_coverage_10": 0.28418153524398804,
|
|
"eval_rewards/batch_coverage_15": 0.27920960262417793,
|
|
"eval_rewards/batch_coverage_20": 0.24274297058582306,
|
|
"eval_rewards/batch_coverage_25": 0.24443916976451874,
|
|
"eval_rewards/batch_coverage_5": 0.28614822030067444,
|
|
"eval_rewards/brier_reward": 0.7955427914857864,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.803955078125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.003532207338139415,
|
|
"eval_rewards/frontier_ece_reward": 0.010001325979828835,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 22.3192,
|
|
"eval_samples_per_second": 22.402,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4737548828125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49411213397979736,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23687744140625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23687744140625,
|
|
"eval_signal/advantage_abs_mean": 0.2209746055305004,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2209746055305004,
|
|
"eval_signal/advantage_pre_scale_std": 0.2672167122364044,
|
|
"eval_signal/advantage_std": 0.2672167122364044,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.42352383583784103,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.48895537108182907,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04235238581895828,
|
|
"eval_signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04235238581895828,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.42352383583784103,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.48895537108182907,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04235238581895828,
|
|
"eval_signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04235238581895828,
|
|
"eval_signal/batch_coverage_10/centered_abs_mean": 0.42084096372127533,
|
|
"eval_signal/batch_coverage_10/group_std_mean": 0.4858129918575287,
|
|
"eval_signal/batch_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04208409693092108,
|
|
"eval_signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.04208409693092108,
|
|
"eval_signal/batch_coverage_15/centered_abs_mean": 0.4104515314102173,
|
|
"eval_signal/batch_coverage_15/group_std_mean": 0.4733032360672951,
|
|
"eval_signal/batch_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04104515444487333,
|
|
"eval_signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04104515444487333,
|
|
"eval_signal/batch_coverage_20/centered_abs_mean": 0.3303024247288704,
|
|
"eval_signal/batch_coverage_20/group_std_mean": 0.38545073568820953,
|
|
"eval_signal/batch_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03303024219349027,
|
|
"eval_signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.03303024219349027,
|
|
"eval_signal/batch_coverage_25/centered_abs_mean": 0.33572477102279663,
|
|
"eval_signal/batch_coverage_25/group_std_mean": 0.38896334171295166,
|
|
"eval_signal/batch_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03357247728854418,
|
|
"eval_signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03357247728854418,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.42352383583784103,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.48895537108182907,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04235238581895828,
|
|
"eval_signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04235238581895828,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2786501497030258,
|
|
"eval_signal/brier_reward/group_std_mean": 0.34969785809516907,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02786501543596387,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02786501543596387,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.111419677734375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1263575330376625,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011141967726871371,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011141967726871371,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004553930484689772,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.00847341027110815,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.692413469660096e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.692413469660096e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.022189988754689693,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.028291028458625078,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002218998968601227,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002218998968601227,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.179,
|
|
"step": 300
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2495020566812151,
|
|
"calibration/batch_distribution_entropy": 0.6821244193032813,
|
|
"calibration/buffer_distribution_entropy": 0.7306995672624862,
|
|
"calibration/confidence_entropy": 0.21918015091457263,
|
|
"calibration/coverage@0%": 0.008984375,
|
|
"calibration/coverage@1%": 0.008984375,
|
|
"calibration/coverage@10%": 0.113671875,
|
|
"calibration/coverage@15%": 0.46630228718199607,
|
|
"calibration/coverage@20%": 0.5499304366438356,
|
|
"calibration/coverage@25%": 0.6015204562133072,
|
|
"calibration/coverage@30%": 0.6527129708904109,
|
|
"calibration/coverage@5%": 0.0890625,
|
|
"calibration/ece": 0.16477880565297598,
|
|
"calibration/mean_confidence": 0.6071220864675294,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 664.2,
|
|
"completions/max_terminated_length": 664.2,
|
|
"completions/mean_length": 232.00810546875,
|
|
"completions/mean_terminated_length": 232.14267272949218,
|
|
"completions/min_length": 42.2,
|
|
"completions/min_terminated_length": 107.4,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.0017184000462293625,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 1037904258.0,
|
|
"reward": 1.3052581071853637,
|
|
"reward_std": 0.12845401763916015,
|
|
"rewards/accuracy_reward": 0.5328125,
|
|
"rewards/batch_coverage_0": 0.5998408555984497,
|
|
"rewards/batch_coverage_1": 0.5998408555984497,
|
|
"rewards/batch_coverage_10": 0.6285971403121948,
|
|
"rewards/batch_coverage_15": 0.6305443048477173,
|
|
"rewards/batch_coverage_20": 0.6343398213386535,
|
|
"rewards/batch_coverage_25": 0.6352273762226105,
|
|
"rewards/batch_coverage_5": 0.615449583530426,
|
|
"rewards/brier_reward": 0.8177313566207886,
|
|
"rewards/confidence_uniqueness_reward": 0.8184030890464783,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0027989137917757033,
|
|
"rewards/frontier_ece_reward": 0.012824122048914432,
|
|
"rewards/frontier_entropy_batch_reward": -0.6000243067741394,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07099609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.0958763062953949,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.721875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035498046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035498046875,
|
|
"signal/advantage_abs_mean": 0.09721664190292359,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09721664190292359,
|
|
"signal/advantage_pre_scale_std": 0.16938933432102204,
|
|
"signal/advantage_std": 0.16938933432102204,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.121105095744133,
|
|
"signal/batch_coverage_0/group_std_mean": 0.16181692481040955,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012110509909689426,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012110509909689426,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.121105095744133,
|
|
"signal/batch_coverage_1/group_std_mean": 0.16181692481040955,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012110509909689426,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012110509909689426,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.12876548916101455,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17223238348960876,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012876549735665321,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.012876549735665321,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13056999146938325,
|
|
"signal/batch_coverage_15/group_std_mean": 0.17412794828414918,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013056999444961548,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.013056999444961548,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.13263879269361495,
|
|
"signal/batch_coverage_20/group_std_mean": 0.17653416395187377,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013263879343867302,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013263879343867302,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.13004723638296128,
|
|
"signal/batch_coverage_25/group_std_mean": 0.17350882589817046,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013004723750054837,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.013004723750054837,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1247675284743309,
|
|
"signal/batch_coverage_5/group_std_mean": 0.16621632874011993,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.025,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012476752884685994,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012476752884685994,
|
|
"signal/brier_reward/centered_abs_mean": 0.10762623548507691,
|
|
"signal/brier_reward/group_std_mean": 0.14009990394115449,
|
|
"signal/brier_reward/group_zero_std_frac": 0.025,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010762624442577362,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010762624442577362,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07212853208184242,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09378160238265991,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.025,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007212853524833918,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007212853524833918,
|
|
"signal/format_reward/centered_abs_mean": 0.00142822265625,
|
|
"signal/format_reward/group_std_mean": 0.002923433808609843,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000714111328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000714111328125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014352105092257262,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020134341903030872,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.794013242033543e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.794013242033543e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013121502846479416,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01770468596369028,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.06875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013121502939611672,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013121502939611672,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27344033122062683,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3469135522842407,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.171875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02734403349459171,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02734403349459171,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36944233718005665,
|
|
"calibration/batch_distribution_entropy": 0.6843551315294027,
|
|
"calibration/buffer_distribution_entropy": 0.729047477608188,
|
|
"calibration/confidence_entropy": 0.21484423873154287,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.031640625,
|
|
"calibration/coverage@15%": 0.08125,
|
|
"calibration/coverage@20%": 0.1671875,
|
|
"calibration/coverage@25%": 0.355078125,
|
|
"calibration/coverage@30%": 0.440625,
|
|
"calibration/coverage@5%": 0.031640625,
|
|
"calibration/ece": 0.21733653989218463,
|
|
"calibration/mean_confidence": 0.5253131950893684,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 743.0,
|
|
"completions/max_terminated_length": 743.0,
|
|
"completions/mean_length": 229.2525390625,
|
|
"completions/mean_terminated_length": 229.2525390625,
|
|
"completions/min_length": 102.8,
|
|
"completions/min_terminated_length": 102.8,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0018200714839622378,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 1055380284.0,
|
|
"reward": 1.2748222351074219,
|
|
"reward_std": 0.12597642987966537,
|
|
"rewards/accuracy_reward": 0.5037109375,
|
|
"rewards/batch_coverage_0": 0.5769623160362244,
|
|
"rewards/batch_coverage_1": 0.5769623160362244,
|
|
"rewards/batch_coverage_10": 0.6048051595687867,
|
|
"rewards/batch_coverage_15": 0.6059326887130737,
|
|
"rewards/batch_coverage_20": 0.6096433520317077,
|
|
"rewards/batch_coverage_25": 0.6107012748718261,
|
|
"rewards/batch_coverage_5": 0.5928257584571839,
|
|
"rewards/brier_reward": 0.7898470520973205,
|
|
"rewards/confidence_uniqueness_reward": 0.8456288576126099,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0027298410423099995,
|
|
"rewards/frontier_ece_reward": 0.01039378009736538,
|
|
"rewards/frontier_entropy_batch_reward": -0.5927170753479004,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.06654052734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.08761050999164581,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.75,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.033270263671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.033270263671875,
|
|
"signal/advantage_abs_mean": 0.09469048231840134,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09469048231840134,
|
|
"signal/advantage_pre_scale_std": 0.16549064517021178,
|
|
"signal/advantage_std": 0.16549064517021178,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1224028378725052,
|
|
"signal/batch_coverage_0/group_std_mean": 0.16056158244609833,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012240284122526646,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.012240284122526646,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1224028378725052,
|
|
"signal/batch_coverage_1/group_std_mean": 0.16056158244609833,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012240284122526646,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.012240284122526646,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.12704037725925446,
|
|
"signal/batch_coverage_10/group_std_mean": 0.16898048520088196,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012704038433730603,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.012704038433730603,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.12583906352519988,
|
|
"signal/batch_coverage_15/group_std_mean": 0.16771935522556305,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012583906389772892,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.012583906389772892,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.12735906690359117,
|
|
"signal/batch_coverage_20/group_std_mean": 0.17019020318984984,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01273590698838234,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.01273590698838234,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.12817565351724625,
|
|
"signal/batch_coverage_25/group_std_mean": 0.1711154282093048,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012817565724253655,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.012817565724253655,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.12552004754543306,
|
|
"signal/batch_coverage_5/group_std_mean": 0.16624825298786164,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.009375,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012552005238831043,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.012552005238831043,
|
|
"signal/brier_reward/centered_abs_mean": 0.10650904327630997,
|
|
"signal/brier_reward/group_std_mean": 0.13855279684066774,
|
|
"signal/brier_reward/group_zero_std_frac": 0.009375,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010650904849171638,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010650904849171638,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06525479182600975,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08299526795744896,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0065254792105406524,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0065254792105406524,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015120732598006726,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020327494712546468,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.890091589302756e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.890091589302756e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01312310602515936,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0178884819149971,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001312310597859323,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001312310597859323,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28517351746559144,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3595928966999054,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.134375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028517350926995276,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028517350926995276,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2572230256137267,
|
|
"calibration/batch_distribution_entropy": 0.6403200916464951,
|
|
"calibration/buffer_distribution_entropy": 0.727542698126519,
|
|
"calibration/confidence_entropy": 0.20421884610236768,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0390625,
|
|
"calibration/coverage@15%": 0.1025390625,
|
|
"calibration/coverage@20%": 0.30078125,
|
|
"calibration/coverage@25%": 0.599609375,
|
|
"calibration/coverage@30%": 0.734375,
|
|
"calibration/coverage@5%": 0.0390625,
|
|
"calibration/ece": 0.18167404731060102,
|
|
"calibration/mean_confidence": 0.6568276885579987,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00031887755102039117,
|
|
"completions/max_length": 508.0,
|
|
"completions/max_terminated_length": 508.0,
|
|
"completions/mean_length": 228.7561492919922,
|
|
"completions/mean_terminated_length": 228.83031463623047,
|
|
"completions/min_length": 55.5,
|
|
"completions/min_terminated_length": 106.0,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1062317684.0,
|
|
"reward": 1.2654950618743896,
|
|
"reward_std": 0.13095472753047943,
|
|
"rewards/accuracy_reward": 0.535400390625,
|
|
"rewards/batch_coverage_0": 0.5433026552200317,
|
|
"rewards/batch_coverage_1": 0.5433026552200317,
|
|
"rewards/batch_coverage_10": 0.5609183311462402,
|
|
"rewards/batch_coverage_15": 0.5650316774845123,
|
|
"rewards/batch_coverage_20": 0.5628845393657684,
|
|
"rewards/batch_coverage_25": 0.5658237636089325,
|
|
"rewards/batch_coverage_5": 0.5589036047458649,
|
|
"rewards/brier_reward": 0.7723598182201385,
|
|
"rewards/confidence_uniqueness_reward": 0.8834567368030548,
|
|
"rewards/format_reward": 0.999755859375,
|
|
"rewards/frontier_aurc_reward": -0.002115529146976769,
|
|
"rewards/frontier_ece_reward": 0.01011998625472188,
|
|
"rewards/frontier_entropy_batch_reward": -0.586669921875,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0801849365234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10365201532840729,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.7109375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04009246826171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04009246826171875,
|
|
"signal/advantage_abs_mean": 0.10032283142209053,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10032283142209053,
|
|
"signal/advantage_pre_scale_std": 0.16637051850557327,
|
|
"signal/advantage_std": 0.16637051850557327,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1333833485841751,
|
|
"signal/batch_coverage_0/group_std_mean": 0.17518048733472824,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0078125,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013338335324078798,
|
|
"signal/batch_coverage_0/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.013338335324078798,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1333833485841751,
|
|
"signal/batch_coverage_1/group_std_mean": 0.17518048733472824,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0078125,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013338335324078798,
|
|
"signal/batch_coverage_1/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.013338335324078798,
|
|
"signal/batch_coverage_10/centered_abs_mean": 0.13491075485944748,
|
|
"signal/batch_coverage_10/group_std_mean": 0.17798280715942383,
|
|
"signal/batch_coverage_10/group_zero_std_frac": 0.0078125,
|
|
"signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013491075951606035,
|
|
"signal/batch_coverage_10/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_10/weighted_centered_abs_mean": 0.013491075951606035,
|
|
"signal/batch_coverage_15/centered_abs_mean": 0.13440050929784775,
|
|
"signal/batch_coverage_15/group_std_mean": 0.1781018227338791,
|
|
"signal/batch_coverage_15/group_zero_std_frac": 0.0078125,
|
|
"signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01344005111604929,
|
|
"signal/batch_coverage_15/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_15/weighted_centered_abs_mean": 0.01344005111604929,
|
|
"signal/batch_coverage_20/centered_abs_mean": 0.13126233220100403,
|
|
"signal/batch_coverage_20/group_std_mean": 0.1750546544790268,
|
|
"signal/batch_coverage_20/group_zero_std_frac": 0.0078125,
|
|
"signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013126233592629433,
|
|
"signal/batch_coverage_20/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_20/weighted_centered_abs_mean": 0.013126233592629433,
|
|
"signal/batch_coverage_25/centered_abs_mean": 0.1286875493824482,
|
|
"signal/batch_coverage_25/group_std_mean": 0.17355988919734955,
|
|
"signal/batch_coverage_25/group_zero_std_frac": 0.0078125,
|
|
"signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012868755962699652,
|
|
"signal/batch_coverage_25/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_25/weighted_centered_abs_mean": 0.012868755962699652,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13534656167030334,
|
|
"signal/batch_coverage_5/group_std_mean": 0.17881041020154953,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0078125,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013534656260162592,
|
|
"signal/batch_coverage_5/weight": 0.10000000149011612,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.013534656260162592,
|
|
"signal/brier_reward/centered_abs_mean": 0.11168951913714409,
|
|
"signal/brier_reward/group_std_mean": 0.1451142355799675,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0078125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011168952565640211,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011168952565640211,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.050195975229144096,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06316389329731464,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.015625,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005019597476348281,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005019597476348281,
|
|
"signal/format_reward/centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/group_std_mean": 0.0013810679083690047,
|
|
"signal/format_reward/group_zero_std_frac": 0.9921875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011146239121444523,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0015803179703652859,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3932799447502475e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3932799447502475e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014482187572866678,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01911458931863308,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0234375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001448218827135861,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001448218827135861,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2982959598302841,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36800524592399597,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1484375,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029829598031938076,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029829598031938076,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.0003725691671999625,
|
|
"train_runtime": 61071.055,
|
|
"train_samples_per_second": 0.327,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1062317684,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|