5724 lines
354 KiB
JSON
5724 lines
354 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.47117773098793353,
|
|
"calibration/batch_distribution_entropy": 0.2859162943204464,
|
|
"calibration/buffer_distribution_entropy": 0.29128966207738405,
|
|
"calibration/confidence_entropy": 0.22040658035376542,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4490633383918142,
|
|
"calibration/mean_confidence": 0.9153706387891617,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.019357638888888907,
|
|
"completions/max_length": 3998.4,
|
|
"completions/max_terminated_length": 3998.4,
|
|
"completions/mean_length": 516.5592895507813,
|
|
"completions/mean_terminated_length": 526.74970703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.004050114192068577,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.0035,
|
|
"num_tokens": 9064971.0,
|
|
"reward": 0.5567546248435974,
|
|
"reward_std": 0.4937511146068573,
|
|
"rewards/accuracy_reward": 0.2719618022441864,
|
|
"rewards/brier_reward": 0.32214988470077516,
|
|
"rewards/confidence_uniqueness_reward": 0.2880812108516693,
|
|
"rewards/format_reward": 0.6037326335906983,
|
|
"rewards/frontier_aurc_reward": 0.16993313934653997,
|
|
"rewards/frontier_coverage_1": 0.1746483048889786,
|
|
"rewards/frontier_coverage_10": 0.1746483048889786,
|
|
"rewards/frontier_coverage_15": 0.1746483048889786,
|
|
"rewards/frontier_coverage_20": 0.1746483048889786,
|
|
"rewards/frontier_coverage_25": 0.1746483048889786,
|
|
"rewards/frontier_coverage_5": 0.1746483048889786,
|
|
"rewards/frontier_ece_reward": 0.16663597179576756,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3148274779319763,
|
|
"signal/accuracy_reward/group_std_mean": 0.3746787905693054,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.07500000149011612,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15741373896598815,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15741373896598815,
|
|
"signal/advantage_abs_mean": 0.42672874331474303,
|
|
"signal/advantage_pre_scale_abs_mean": 0.42672874331474303,
|
|
"signal/advantage_pre_scale_std": 0.49877622723579407,
|
|
"signal/advantage_std": 0.49877622723579407,
|
|
"signal/brier_reward/centered_abs_mean": 0.3234766721725464,
|
|
"signal/brier_reward/group_std_mean": 0.37651577591896057,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0404345840215683,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0404345840215683,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2356457978487015,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2879685640335083,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029455724731087686,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029455724731087686,
|
|
"signal/format_reward/centered_abs_mean": 0.43563910126686095,
|
|
"signal/format_reward/group_std_mean": 0.4719055533409119,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21781955063343048,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.21781955063343048,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.19491605628281833,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.22967487033456563,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0034889972681412472,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0034889972681412472,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1973324902355671,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23634177595376968,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1973324902355671,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23634177595376968,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1973324902355671,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23634177595376968,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1973324902355671,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23634177595376968,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1973324902355671,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23634177595376968,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1973324902355671,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23634177595376968,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035322514304425567,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2454825758934021,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.2889118641614914,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030685321986675264,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030685321986675264,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4703947084269996,
|
|
"calibration/batch_distribution_entropy": 0.2489212385632244,
|
|
"calibration/buffer_distribution_entropy": 0.2842347502223941,
|
|
"calibration/confidence_entropy": 0.222495853565373,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4272499701997674,
|
|
"calibration/mean_confidence": 0.9193827270131825,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017795138888888885,
|
|
"completions/max_length": 4050.8,
|
|
"completions/max_terminated_length": 4050.8,
|
|
"completions/mean_length": 477.50877685546874,
|
|
"completions/mean_terminated_length": 486.2249816894531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 15.0,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.004755768924951553,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 0.0049,
|
|
"num_tokens": 17648592.0,
|
|
"reward": 0.5936285734176636,
|
|
"reward_std": 0.42507994174957275,
|
|
"rewards/accuracy_reward": 0.3043402791023254,
|
|
"rewards/brier_reward": 0.36583648920059203,
|
|
"rewards/confidence_uniqueness_reward": 0.35002759099006653,
|
|
"rewards/format_reward": 0.7091145873069763,
|
|
"rewards/frontier_aurc_reward": -0.006095233652740717,
|
|
"rewards/frontier_coverage_1": 0.005141327064484358,
|
|
"rewards/frontier_coverage_10": 0.005141327064484358,
|
|
"rewards/frontier_coverage_15": 0.005141327064484358,
|
|
"rewards/frontier_coverage_20": 0.005141327064484358,
|
|
"rewards/frontier_coverage_25": 0.005141327064484358,
|
|
"rewards/frontier_coverage_5": 0.005141327064484358,
|
|
"rewards/frontier_ece_reward": -0.02419957034289837,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3331814229488373,
|
|
"signal/accuracy_reward/group_std_mean": 0.38797804713249207,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.08055555745959282,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16659071147441865,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16659071147441865,
|
|
"signal/advantage_abs_mean": 0.3555244505405426,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3555244505405426,
|
|
"signal/advantage_pre_scale_std": 0.42819578647613527,
|
|
"signal/advantage_std": 0.42819578647613527,
|
|
"signal/brier_reward/centered_abs_mean": 0.3271039307117462,
|
|
"signal/brier_reward/group_std_mean": 0.3775700032711029,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.040887991338968276,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.040887991338968276,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22568395733833313,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2805815994739532,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02821049466729164,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02821049466729164,
|
|
"signal/format_reward/centered_abs_mean": 0.36475151777267456,
|
|
"signal/format_reward/group_std_mean": 0.4275496780872345,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.18237575888633728,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.18237575888633728,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.006412158068269491,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008681737259030342,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.000114777623093687,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.000114777623093687,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.015101977251470089,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.03188966251909733,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.015101977251470089,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.03188966251909733,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.015101977251470089,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.03188966251909733,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.015101977251470089,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.03188966251909733,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.015101977251470089,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.03188966251909733,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.015101977251470089,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.03188966251909733,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00027032537909690293,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1461539089679718,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.1712912440299988,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.018269238620996477,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.018269238620996477,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5425230736344001,
|
|
"calibration/batch_distribution_entropy": 0.2866005839972722,
|
|
"calibration/buffer_distribution_entropy": 0.27346687050298973,
|
|
"calibration/confidence_entropy": 0.24153054751308325,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5060684725221445,
|
|
"calibration/mean_confidence": 0.9123262424801549,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012586805555555558,
|
|
"completions/max_length": 3972.0,
|
|
"completions/max_terminated_length": 3972.0,
|
|
"completions/mean_length": 429.3203125,
|
|
"completions/mean_terminated_length": 434.8564453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.0030024258885532618,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0073,
|
|
"num_tokens": 25696346.0,
|
|
"reward": 0.7226181745529174,
|
|
"reward_std": 0.328831684589386,
|
|
"rewards/accuracy_reward": 0.32343749403953553,
|
|
"rewards/brier_reward": 0.4225196659564972,
|
|
"rewards/confidence_uniqueness_reward": 0.4882237255573273,
|
|
"rewards/format_reward": 0.909375011920929,
|
|
"rewards/frontier_aurc_reward": -0.007289860583841801,
|
|
"rewards/frontier_coverage_1": 0.008264282252639532,
|
|
"rewards/frontier_coverage_10": 0.008264282252639532,
|
|
"rewards/frontier_coverage_15": 0.008264282252639532,
|
|
"rewards/frontier_coverage_20": 0.008264282252639532,
|
|
"rewards/frontier_coverage_25": 0.008264282252639532,
|
|
"rewards/frontier_coverage_5": 0.008264282252639532,
|
|
"rewards/frontier_ece_reward": -0.06710481271147728,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.32534722089767454,
|
|
"signal/accuracy_reward/group_std_mean": 0.38704482316970823,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.05000000149011612,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16267361044883727,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16267361044883727,
|
|
"signal/advantage_abs_mean": 0.2616334229707718,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2616334229707718,
|
|
"signal/advantage_pre_scale_std": 0.3352875530719757,
|
|
"signal/advantage_std": 0.3352875530719757,
|
|
"signal/brier_reward/centered_abs_mean": 0.302908456325531,
|
|
"signal/brier_reward/group_std_mean": 0.356386786699295,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03786355704069137,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03786355704069137,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19381275475025178,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.24320927560329436,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024226594343781473,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.024226594343781473,
|
|
"signal/format_reward/centered_abs_mean": 0.14767795130610467,
|
|
"signal/format_reward/group_std_mean": 0.2351602092385292,
|
|
"signal/format_reward/group_zero_std_frac": 0.19444444738328456,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07383897565305234,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.07383897565305234,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005967351235449314,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.007935958448797464,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010681558633223176,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010681558633223176,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.01820983216166496,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.035216915607452395,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.01820983216166496,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.035216915607452395,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.01820983216166496,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.035216915607452395,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.01820983216166496,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.035216915607452395,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.01820983216166496,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.035216915607452395,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.01820983216166496,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.035216915607452395,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00032595600350759923,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.16836784183979034,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.1972884237766266,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.021045980229973792,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.021045980229973792,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.49082180069506387,
|
|
"calibration/batch_distribution_entropy": 0.3908793649822321,
|
|
"calibration/buffer_distribution_entropy": 0.2954284965643815,
|
|
"calibration/confidence_entropy": 0.312221649530802,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0356020942408377,
|
|
"calibration/coverage@30%": 0.07853403141361257,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4198799349990739,
|
|
"calibration/mean_confidence": 0.883742897278178,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009114583333333325,
|
|
"completions/max_length": 3640.0,
|
|
"completions/max_terminated_length": 3640.0,
|
|
"completions/mean_length": 430.2876708984375,
|
|
"completions/mean_terminated_length": 434.2104797363281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 80.0,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.0022384580224752426,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.0087,
|
|
"num_tokens": 33766956.0,
|
|
"reward": 0.8248475670814515,
|
|
"reward_std": 0.26103408336639405,
|
|
"rewards/accuracy_reward": 0.39869791865348814,
|
|
"rewards/brier_reward": 0.521475088596344,
|
|
"rewards/confidence_uniqueness_reward": 0.5816608428955078,
|
|
"rewards/format_reward": 0.9853298544883728,
|
|
"rewards/frontier_aurc_reward": -0.0063677155412733555,
|
|
"rewards/frontier_coverage_1": 0.009802600927650928,
|
|
"rewards/frontier_coverage_10": 0.009802600927650928,
|
|
"rewards/frontier_coverage_15": 0.009802600927650928,
|
|
"rewards/frontier_coverage_20": 0.009802600927650928,
|
|
"rewards/frontier_coverage_25": 0.009802600927650928,
|
|
"rewards/frontier_coverage_5": 0.009802600927650928,
|
|
"rewards/frontier_ece_reward": -0.04797694368753582,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3026530027389526,
|
|
"signal/accuracy_reward/group_std_mean": 0.3679898679256439,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0722222238779068,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1513265013694763,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1513265013694763,
|
|
"signal/advantage_abs_mean": 0.20957765579223633,
|
|
"signal/advantage_pre_scale_abs_mean": 0.20957765579223633,
|
|
"signal/advantage_pre_scale_std": 0.269910192489624,
|
|
"signal/advantage_std": 0.269910192489624,
|
|
"signal/brier_reward/centered_abs_mean": 0.26383275985717775,
|
|
"signal/brier_reward/group_std_mean": 0.31845086216926577,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03297909498214722,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03297909498214722,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19037654995918274,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.22486165761947632,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023797068744897842,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023797068744897842,
|
|
"signal/format_reward/centered_abs_mean": 0.027153862826526164,
|
|
"signal/format_reward/group_std_mean": 0.061953308433294295,
|
|
"signal/format_reward/group_zero_std_frac": 0.7055555701255798,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013576931413263082,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013576931413263082,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00443207286298275,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006194448843598366,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.9334105248563e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.9334105248563e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.026542419195175172,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.04689379408955574,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.026542419195175172,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.04689379408955574,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.026542419195175172,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.04689379408955574,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.026542419195175172,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04689379408955574,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.026542419195175172,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04689379408955574,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.026542419195175172,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.04689379408955574,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0004751092870719731,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1626291185617447,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.19648596048355102,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.020328639820218087,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.020328639820218087,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.39173247160966357,
|
|
"calibration/batch_distribution_entropy": 0.5750020929892202,
|
|
"calibration/buffer_distribution_entropy": 0.37760437579184847,
|
|
"calibration/confidence_entropy": 0.39650951222978464,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.06945169712793733,
|
|
"calibration/coverage@25%": 0.11382180156657964,
|
|
"calibration/coverage@30%": 0.20625,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.2721868572108504,
|
|
"calibration/mean_confidence": 0.8325929617455735,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010937499999999978,
|
|
"completions/max_length": 3833.0,
|
|
"completions/max_terminated_length": 3833.0,
|
|
"completions/mean_length": 463.96650390625,
|
|
"completions/mean_terminated_length": 469.0935791015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 100.4,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.0008329463307745755,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.0079,
|
|
"num_tokens": 42236298.0,
|
|
"reward": 0.9248562097549439,
|
|
"reward_std": 0.2358280152082443,
|
|
"rewards/accuracy_reward": 0.5243923544883728,
|
|
"rewards/brier_reward": 0.6481992244720459,
|
|
"rewards/confidence_uniqueness_reward": 0.6879818558692932,
|
|
"rewards/format_reward": 0.9878472208976745,
|
|
"rewards/frontier_aurc_reward": -0.004981133854016662,
|
|
"rewards/frontier_coverage_1": 0.002557537937536836,
|
|
"rewards/frontier_coverage_10": 0.002557537937536836,
|
|
"rewards/frontier_coverage_15": 0.002557537937536836,
|
|
"rewards/frontier_coverage_20": 0.002557537937536836,
|
|
"rewards/frontier_coverage_25": 0.002557537937536836,
|
|
"rewards/frontier_coverage_5": 0.002557537937536836,
|
|
"rewards/frontier_ece_reward": 0.012226011976599694,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.28567166328430177,
|
|
"signal/accuracy_reward/group_std_mean": 0.35009088516235354,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.11388889104127883,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14283583164215088,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14283583164215088,
|
|
"signal/advantage_abs_mean": 0.1873850554227829,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1873850554227829,
|
|
"signal/advantage_pre_scale_std": 0.2495607316493988,
|
|
"signal/advantage_std": 0.2495607316493988,
|
|
"signal/brier_reward/centered_abs_mean": 0.21691001057624817,
|
|
"signal/brier_reward/group_std_mean": 0.26915258169174194,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027113751322031022,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.027113751322031022,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11467475891113281,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.14529187828302384,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014334344863891601,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014334344863891601,
|
|
"signal/format_reward/centered_abs_mean": 0.021853298880159855,
|
|
"signal/format_reward/group_std_mean": 0.0452168170362711,
|
|
"signal/format_reward/group_zero_std_frac": 0.8,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010926649440079927,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010926649440079927,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036150857340544462,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005566043313592672,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.471003143815324e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.471003143815324e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.040765144675970075,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.062347762286663055,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.040765144675970075,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.062347762286663055,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.040765144675970075,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.062347762286663055,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.040765144675970075,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.062347762286663055,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.040765144675970075,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.062347762286663055,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.040765144675970075,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.062347762286663055,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007296960451640188,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.13129711151123047,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.16005820035934448,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01641213893890381,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01641213893890381,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3284406681871355,
|
|
"calibration/batch_distribution_entropy": 0.6747878884432902,
|
|
"calibration/buffer_distribution_entropy": 0.5584650318532487,
|
|
"calibration/confidence_entropy": 0.5556384101353083,
|
|
"calibration/coverage@0%": 0.0010471204188481676,
|
|
"calibration/coverage@1%": 0.0010471204188481676,
|
|
"calibration/coverage@10%": 0.0010471204188481676,
|
|
"calibration/coverage@15%": 0.015785200324775317,
|
|
"calibration/coverage@20%": 0.07140799084621567,
|
|
"calibration/coverage@25%": 0.1294562021482756,
|
|
"calibration/coverage@30%": 0.36101967648308186,
|
|
"calibration/coverage@5%": 0.0010471204188481676,
|
|
"calibration/ece": 0.12430206094720246,
|
|
"calibration/mean_confidence": 0.7177475003670872,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013802083333333326,
|
|
"completions/max_length": 3608.6,
|
|
"completions/max_terminated_length": 3608.6,
|
|
"completions/mean_length": 528.2802978515625,
|
|
"completions/mean_terminated_length": 535.6919372558593,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 127.2,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.0006069006049074233,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.0105,
|
|
"num_tokens": 51432007.0,
|
|
"reward": 0.9590584993362427,
|
|
"reward_std": 0.19429016709327698,
|
|
"rewards/accuracy_reward": 0.5807291746139527,
|
|
"rewards/brier_reward": 0.7223093867301941,
|
|
"rewards/confidence_uniqueness_reward": 0.6810937523841858,
|
|
"rewards/format_reward": 0.9850694298744201,
|
|
"rewards/frontier_aurc_reward": -0.0038667929824441672,
|
|
"rewards/frontier_coverage_1": -0.010892250412143766,
|
|
"rewards/frontier_coverage_10": -0.010892250412143766,
|
|
"rewards/frontier_coverage_15": -0.010892250412143766,
|
|
"rewards/frontier_coverage_20": -0.010892250412143766,
|
|
"rewards/frontier_coverage_25": -0.010892250412143766,
|
|
"rewards/frontier_coverage_5": -0.010892250412143766,
|
|
"rewards/frontier_ece_reward": 0.015782377682626247,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.23936631977558137,
|
|
"signal/accuracy_reward/group_std_mean": 0.30298495292663574,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.1833333343267441,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11968315988779069,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11968315988779069,
|
|
"signal/advantage_abs_mean": 0.14813488423824311,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14813488423824311,
|
|
"signal/advantage_pre_scale_std": 0.21500767171382903,
|
|
"signal/advantage_std": 0.21500767171382903,
|
|
"signal/brier_reward/centered_abs_mean": 0.146245139837265,
|
|
"signal/brier_reward/group_std_mean": 0.18797107338905333,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018280642479658125,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018280642479658125,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11807332634925842,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.15081189423799515,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014759165793657303,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014759165793657303,
|
|
"signal/format_reward/centered_abs_mean": 0.02557508684694767,
|
|
"signal/format_reward/group_std_mean": 0.04947390109300613,
|
|
"signal/format_reward/group_zero_std_frac": 0.7916666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012787543423473834,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012787543423473834,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022939105052500962,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038691143039613963,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1060995863517745e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1060995863517745e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0672921821475029,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.08672473132610321,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0672921821475029,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.08672473132610321,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0672921821475029,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08672473132610321,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0672921821475029,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08672473132610321,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0672921821475029,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08672473132610321,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0672921821475029,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.08672473132610321,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001204529986716807,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.058205033838748935,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07591410800814628,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007275629229843617,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007275629229843617,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26403452047660375,
|
|
"calibration/batch_distribution_entropy": 0.6640550979352706,
|
|
"calibration/buffer_distribution_entropy": 0.6891557885929268,
|
|
"calibration/confidence_entropy": 0.5629938859398411,
|
|
"calibration/coverage@0%": 0.0042235751477624536,
|
|
"calibration/coverage@1%": 0.0042235751477624536,
|
|
"calibration/coverage@10%": 0.02877949620039404,
|
|
"calibration/coverage@15%": 0.0875555012196266,
|
|
"calibration/coverage@20%": 0.10297304507927574,
|
|
"calibration/coverage@25%": 0.480761541889483,
|
|
"calibration/coverage@30%": 0.8077846479500892,
|
|
"calibration/coverage@5%": 0.0042235751477624536,
|
|
"calibration/ece": 0.0715633782015198,
|
|
"calibration/mean_confidence": 0.7171294939722299,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015017361111111094,
|
|
"completions/max_length": 3967.2,
|
|
"completions/max_terminated_length": 3967.2,
|
|
"completions/mean_length": 572.1895263671875,
|
|
"completions/mean_terminated_length": 580.9646606445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 165.4,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.00052351359045133,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.011,
|
|
"num_tokens": 61101070.0,
|
|
"reward": 0.9819595694541932,
|
|
"reward_std": 0.17260923683643342,
|
|
"rewards/accuracy_reward": 0.63125,
|
|
"rewards/brier_reward": 0.7531757354736328,
|
|
"rewards/confidence_uniqueness_reward": 0.6459343433380127,
|
|
"rewards/format_reward": 0.9839409828186035,
|
|
"rewards/frontier_aurc_reward": -0.003214681288227439,
|
|
"rewards/frontier_coverage_1": -0.018123492784798145,
|
|
"rewards/frontier_coverage_10": -0.018123492784798145,
|
|
"rewards/frontier_coverage_15": -0.018123492784798145,
|
|
"rewards/frontier_coverage_20": -0.018123492784798145,
|
|
"rewards/frontier_coverage_25": -0.018123492784798145,
|
|
"rewards/frontier_coverage_5": -0.018123492784798145,
|
|
"rewards/frontier_ece_reward": 0.011834413185715676,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.21002604067325592,
|
|
"signal/accuracy_reward/group_std_mean": 0.26851107478141784,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.272222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10501302033662796,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10501302033662796,
|
|
"signal/advantage_abs_mean": 0.13170475214719773,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13170475214719773,
|
|
"signal/advantage_pre_scale_std": 0.19973941445350646,
|
|
"signal/advantage_std": 0.19973941445350646,
|
|
"signal/brier_reward/centered_abs_mean": 0.1260453164577484,
|
|
"signal/brier_reward/group_std_mean": 0.16464770436286927,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01575566455721855,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01575566455721855,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1379134923219681,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16885134875774382,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01723918654024601,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01723918654024601,
|
|
"signal/format_reward/centered_abs_mean": 0.02651367262005806,
|
|
"signal/format_reward/group_std_mean": 0.04611495956778526,
|
|
"signal/format_reward/group_zero_std_frac": 0.8222222208976746,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01325683631002903,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01325683631002903,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030739160254597666,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005279354751110077,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5023095046635714e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5023095046635714e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.06398859769105911,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.08186554163694382,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.06398859769105911,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.08186554163694382,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06398859769105911,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08186554163694382,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06398859769105911,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08186554163694382,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06398859769105911,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08186554163694382,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.06398859769105911,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.08186554163694382,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001145395915955305,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.028756240755319594,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.038216957822442055,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035945300944149492,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035945300944149492,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2924937155019407,
|
|
"calibration/batch_distribution_entropy": 0.6771849554275204,
|
|
"calibration/buffer_distribution_entropy": 0.6937744894431359,
|
|
"calibration/confidence_entropy": 0.4959900350122277,
|
|
"calibration/coverage@0%": 0.0026329676433828634,
|
|
"calibration/coverage@1%": 0.0026329676433828634,
|
|
"calibration/coverage@10%": 0.03893218024180806,
|
|
"calibration/coverage@15%": 0.07830225898196555,
|
|
"calibration/coverage@20%": 0.17442651665930492,
|
|
"calibration/coverage@25%": 0.344083234092524,
|
|
"calibration/coverage@30%": 0.4509267354187839,
|
|
"calibration/coverage@5%": 0.0026329676433828634,
|
|
"calibration/ece": 0.1226596169961736,
|
|
"calibration/mean_confidence": 0.7687242431311752,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014236111111111093,
|
|
"completions/max_length": 3888.2,
|
|
"completions/max_terminated_length": 3888.2,
|
|
"completions/mean_length": 601.69931640625,
|
|
"completions/mean_terminated_length": 610.4102172851562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 175.0,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.0005529921618290246,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0114,
|
|
"num_tokens": 71152166.0,
|
|
"reward": 0.9981375932693481,
|
|
"reward_std": 0.1639564424753189,
|
|
"rewards/accuracy_reward": 0.6465277910232544,
|
|
"rewards/brier_reward": 0.7522081136703491,
|
|
"rewards/confidence_uniqueness_reward": 0.7125440120697022,
|
|
"rewards/format_reward": 0.9847222208976746,
|
|
"rewards/frontier_aurc_reward": -0.005940702743828297,
|
|
"rewards/frontier_coverage_1": -0.01573992893099785,
|
|
"rewards/frontier_coverage_10": -0.01573992893099785,
|
|
"rewards/frontier_coverage_15": -0.01573992893099785,
|
|
"rewards/frontier_coverage_20": -0.014948921743780375,
|
|
"rewards/frontier_coverage_25": -0.012497423123568297,
|
|
"rewards/frontier_coverage_5": -0.01573992893099785,
|
|
"rewards/frontier_ece_reward": 0.009145193360745906,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1869140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2490226775407791,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2916666746139526,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09345703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09345703125,
|
|
"signal/advantage_abs_mean": 0.11943778544664382,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11943778544664382,
|
|
"signal/advantage_pre_scale_std": 0.19496967792510986,
|
|
"signal/advantage_std": 0.19496967792510986,
|
|
"signal/brier_reward/centered_abs_mean": 0.13451988697052003,
|
|
"signal/brier_reward/group_std_mean": 0.17633683681488038,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016814985871315004,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016814985871315004,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11400759369134902,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.14184125363826752,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014250949211418628,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014250949211418628,
|
|
"signal/format_reward/centered_abs_mean": 0.02620442695915699,
|
|
"signal/format_reward/group_std_mean": 0.04804914817214012,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333611488343,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013102213479578495,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013102213479578495,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.008588980231434108,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.014885761030018329,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00015374274080386384,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00015374274080386384,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05871818587183952,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07801110148429871,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010510555002838373,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010510555002838373,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.05871818587183952,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07801110148429871,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010510555002838373,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010510555002838373,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.05871818587183952,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07801110148429871,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010510555002838373,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010510555002838373,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05651564598083496,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07526759058237076,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010116300079971551,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010116300079971551,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04918262958526611,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06600879728794098,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008803689968772232,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008803689968772232,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05871818587183952,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07801110148429871,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010510555002838373,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010510555002838373,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.017127741128206253,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02196224555373192,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021409676410257816,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021409676410257816,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21364178122642002,
|
|
"calibration/batch_distribution_entropy": 0.6522730345221792,
|
|
"calibration/buffer_distribution_entropy": 0.6968874334968922,
|
|
"calibration/confidence_entropy": 0.4419758190916566,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.06578919131585535,
|
|
"calibration/coverage@15%": 0.10395714068225181,
|
|
"calibration/coverage@20%": 0.5104134577073929,
|
|
"calibration/coverage@25%": 0.7441382057354524,
|
|
"calibration/coverage@30%": 0.963322553686276,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.09702895701947509,
|
|
"calibration/mean_confidence": 0.788752123718866,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01449652777777779,
|
|
"completions/max_length": 3590.6,
|
|
"completions/max_terminated_length": 3590.6,
|
|
"completions/mean_length": 638.76650390625,
|
|
"completions/mean_terminated_length": 648.3088623046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.0,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.0009035434923134744,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.0116,
|
|
"num_tokens": 81646020.0,
|
|
"reward": 1.009622061252594,
|
|
"reward_std": 0.16088135838508605,
|
|
"rewards/accuracy_reward": 0.6539930462837219,
|
|
"rewards/brier_reward": 0.7586196541786194,
|
|
"rewards/confidence_uniqueness_reward": 0.7592846035957337,
|
|
"rewards/format_reward": 0.9852430701255799,
|
|
"rewards/frontier_aurc_reward": -0.007242204900830984,
|
|
"rewards/frontier_coverage_1": -0.006770293042063713,
|
|
"rewards/frontier_coverage_10": -0.006770293042063713,
|
|
"rewards/frontier_coverage_15": -0.00660779308527708,
|
|
"rewards/frontier_coverage_20": -0.0016520024975761772,
|
|
"rewards/frontier_coverage_25": 0.002179227757733315,
|
|
"rewards/frontier_coverage_5": -0.006770293042063713,
|
|
"rewards/frontier_ece_reward": 0.006944606266915798,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18404948115348815,
|
|
"signal/accuracy_reward/group_std_mean": 0.24551969170570373,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2944444537162781,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09202474057674408,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09202474057674408,
|
|
"signal/advantage_abs_mean": 0.11716565787792206,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11716565787792206,
|
|
"signal/advantage_pre_scale_std": 0.19196827709674835,
|
|
"signal/advantage_std": 0.19196827709674835,
|
|
"signal/brier_reward/centered_abs_mean": 0.14656473994255065,
|
|
"signal/brier_reward/group_std_mean": 0.1906241148710251,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01832059249281883,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01832059249281883,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08014513701200485,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10737452805042266,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010018142126500606,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010018142126500606,
|
|
"signal/format_reward/centered_abs_mean": 0.024522569589316846,
|
|
"signal/format_reward/group_std_mean": 0.044725016504526136,
|
|
"signal/format_reward/group_zero_std_frac": 0.8194444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012261284794658423,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012261284794658423,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.01112304050475359,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.019053217209875583,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00019910241826437414,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00019910241826437414,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.062235020101070404,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.08576205521821975,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0011140068061649799,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011140068061649799,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.062235020101070404,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.08576205521821975,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0011140068061649799,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011140068061649799,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06180379539728165,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08518806099891663,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001106287888251245,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001106287888251245,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04614866077899933,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06483671665191651,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008260609698481858,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008260609698481858,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.031763285398483276,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04507193565368652,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005685627809725701,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005685627809725701,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.062235020101070404,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.08576205521821975,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0011140068061649799,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011140068061649799,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011737137474119664,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.015026122331619263,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001467142184264958,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001467142184264958,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.38193449229887216,
|
|
"calibration/batch_distribution_entropy": 0.6438885672841508,
|
|
"calibration/buffer_distribution_entropy": 0.695458344727496,
|
|
"calibration/confidence_entropy": 0.47561906765262674,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.032827148711615114,
|
|
"calibration/coverage@25%": 0.11388592600909449,
|
|
"calibration/coverage@30%": 0.27125548429792523,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.23039143132055712,
|
|
"calibration/mean_confidence": 0.7539081380507843,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011111111111111094,
|
|
"completions/max_length": 3867.6,
|
|
"completions/max_terminated_length": 3867.6,
|
|
"completions/mean_length": 667.6127685546875,
|
|
"completions/mean_terminated_length": 675.1084838867188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.4,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.0005543790175579488,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.0081,
|
|
"num_tokens": 92434519.0,
|
|
"reward": 1.0035521984100342,
|
|
"reward_std": 0.15531424283981324,
|
|
"rewards/accuracy_reward": 0.6418402791023254,
|
|
"rewards/brier_reward": 0.7497736096382142,
|
|
"rewards/confidence_uniqueness_reward": 0.7558701634407043,
|
|
"rewards/format_reward": 0.9885416626930237,
|
|
"rewards/frontier_aurc_reward": -0.006629853136837482,
|
|
"rewards/frontier_coverage_1": -0.007525159860961139,
|
|
"rewards/frontier_coverage_10": -0.007525159860961139,
|
|
"rewards/frontier_coverage_15": -0.007525159860961139,
|
|
"rewards/frontier_coverage_20": -0.0047117485897615555,
|
|
"rewards/frontier_coverage_25": 0.0007355780689977109,
|
|
"rewards/frontier_coverage_5": -0.007525159860961139,
|
|
"rewards/frontier_ece_reward": 0.007075064536184073,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18183593451976776,
|
|
"signal/accuracy_reward/group_std_mean": 0.23834896683692933,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3333333432674408,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09091796725988388,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09091796725988388,
|
|
"signal/advantage_abs_mean": 0.11554279774427414,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11554279774427414,
|
|
"signal/advantage_pre_scale_std": 0.18593416810035707,
|
|
"signal/advantage_std": 0.18593416810035707,
|
|
"signal/brier_reward/centered_abs_mean": 0.15182736814022063,
|
|
"signal/brier_reward/group_std_mean": 0.1949179947376251,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01897842101752758,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01897842101752758,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07905573099851608,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1051044762134552,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00988196637481451,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00988196637481451,
|
|
"signal/format_reward/centered_abs_mean": 0.0185546875,
|
|
"signal/format_reward/group_std_mean": 0.03553221933543682,
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00927734375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00927734375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00905402349308133,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.014961976557970047,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00016206701402552426,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00016206701402552426,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07501375824213027,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.10256523936986923,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013427462195977568,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013427462195977568,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07501375824213027,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.10256523936986923,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013427462195977568,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013427462195977568,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07501375824213027,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10256523936986923,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013427462195977568,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013427462195977568,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06658464595675469,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09160058945417404,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011918651405721902,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011918651405721902,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.048682621121406554,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0679876148700714,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008714188821613789,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008714188821613789,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07501375824213027,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.10256523936986923,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013427462195977568,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013427462195977568,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013041250593960284,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.016401969455182554,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016301563242450356,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016301563242450356,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.2188544017493154,
|
|
"eval_calibration/batch_distribution_entropy": 0.6349679723255258,
|
|
"eval_calibration/buffer_distribution_entropy": 0.6764331779830802,
|
|
"eval_calibration/confidence_entropy": 0.4723842949949193,
|
|
"eval_calibration/coverage@0%": 0.09375,
|
|
"eval_calibration/coverage@1%": 0.09375,
|
|
"eval_calibration/coverage@10%": 0.18229166666666666,
|
|
"eval_calibration/coverage@15%": 0.3229166666666667,
|
|
"eval_calibration/coverage@20%": 0.4270833333333333,
|
|
"eval_calibration/coverage@25%": 0.6510416666666666,
|
|
"eval_calibration/coverage@30%": 0.9270833333333334,
|
|
"eval_calibration/coverage@5%": 0.09375,
|
|
"eval_calibration/ece": 0.1459895833333333,
|
|
"eval_calibration/mean_confidence": 0.75015625,
|
|
"eval_completions/clipped_ratio": 0.008680555555555544,
|
|
"eval_completions/max_length": 2062.8333333333335,
|
|
"eval_completions/max_terminated_length": 2062.8333333333335,
|
|
"eval_completions/mean_length": 663.4890848795573,
|
|
"eval_completions/mean_terminated_length": 669.3323974609375,
|
|
"eval_completions/min_length": 89.16666666666667,
|
|
"eval_completions/min_terminated_length": 255.66666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 92434519.0,
|
|
"eval_reward": 0.9972037474314371,
|
|
"eval_reward_std": 0.28004638353983563,
|
|
"eval_rewards/accuracy_reward": 0.6336805522441864,
|
|
"eval_rewards/brier_reward": 0.7541488905747732,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.7219984630743662,
|
|
"eval_rewards/format_reward": 0.9904513855775198,
|
|
"eval_rewards/frontier_aurc_reward": -0.005396095337346196,
|
|
"eval_rewards/frontier_coverage_1": -0.0035735241253860295,
|
|
"eval_rewards/frontier_coverage_10": -0.0035735241253860295,
|
|
"eval_rewards/frontier_coverage_15": -0.0035735241253860295,
|
|
"eval_rewards/frontier_coverage_20": -0.0029388018786751977,
|
|
"eval_rewards/frontier_coverage_25": 0.0015783853693089138,
|
|
"eval_rewards/frontier_coverage_5": -0.0035735241253860295,
|
|
"eval_rewards/frontier_ece_reward": 0.00796901163024207,
|
|
"eval_runtime": 192.1708,
|
|
"eval_samples_per_second": 5.204,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4539930572112401,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4836362848679225,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22699652860562006,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22699652860562006,
|
|
"eval_signal/advantage_abs_mean": 0.25468970090150833,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.25468970090150833,
|
|
"eval_signal/advantage_pre_scale_std": 0.27723759412765503,
|
|
"eval_signal/advantage_std": 0.27723759412765503,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22432004163662592,
|
|
"eval_signal/brier_reward/group_std_mean": 0.28245019912719727,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02804000520457824,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02804000520457824,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10615977024038632,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13361614073316255,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01326997128004829,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01326997128004829,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.018283419776707888,
|
|
"eval_signal/format_reward/group_std_mean": 0.04803628381341696,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009141709888353944,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.009141709888353944,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.008657781640067697,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.01709814602509141,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00015497428588181114,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00015497428588181114,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.09066158533096313,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.1351415937145551,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016228424113554258,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016228424113554258,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.09066158533096313,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.1351415937145551,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016228424113554258,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016228424113554258,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.09066158533096313,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.1351415937145551,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016228424113554258,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016228424113554258,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0809883214533329,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.12148692086338997,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001449690879477809,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001449690879477809,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.05787591636180878,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.08920721213022868,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010359788914987196,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010359788914987196,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.09066158533096313,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.1351415937145551,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016228424113554258,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016228424113554258,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.01885589553664128,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.023027233468989532,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00235698694208016,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00235698694208016,
|
|
"eval_steps_per_second": 0.031,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26727014999321663,
|
|
"calibration/batch_distribution_entropy": 0.6491827852211476,
|
|
"calibration/buffer_distribution_entropy": 0.6668052837922903,
|
|
"calibration/confidence_entropy": 0.45001724253185077,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.08429319371727748,
|
|
"calibration/coverage@15%": 0.26649214659685866,
|
|
"calibration/coverage@20%": 0.40556231460090497,
|
|
"calibration/coverage@25%": 0.5127937336814622,
|
|
"calibration/coverage@30%": 0.574368543535021,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.14787168232023692,
|
|
"calibration/mean_confidence": 0.7670484147495956,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011545138888888884,
|
|
"completions/max_length": 3615.6,
|
|
"completions/max_terminated_length": 3615.6,
|
|
"completions/mean_length": 701.3361206054688,
|
|
"completions/mean_terminated_length": 709.5888671875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 191.4,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.000779949186835438,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.0093,
|
|
"num_tokens": 103594487.0,
|
|
"reward": 1.0132197499275208,
|
|
"reward_std": 0.15483529269695281,
|
|
"rewards/accuracy_reward": 0.6579861164093017,
|
|
"rewards/brier_reward": 0.7579427838325501,
|
|
"rewards/confidence_uniqueness_reward": 0.7643311500549317,
|
|
"rewards/format_reward": 0.98828125,
|
|
"rewards/frontier_aurc_reward": -0.004797754716128111,
|
|
"rewards/frontier_coverage_1": -0.014039672841317952,
|
|
"rewards/frontier_coverage_10": -0.014039672841317952,
|
|
"rewards/frontier_coverage_15": -0.014039672841317952,
|
|
"rewards/frontier_coverage_20": -0.009265875071287155,
|
|
"rewards/frontier_coverage_25": 0.0018376953317783772,
|
|
"rewards/frontier_coverage_5": -0.014039672841317952,
|
|
"rewards/frontier_ece_reward": 0.008207211550325156,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18003472089767455,
|
|
"signal/accuracy_reward/group_std_mean": 0.23699139356613158,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09001736044883728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09001736044883728,
|
|
"signal/advantage_abs_mean": 0.1147344321012497,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1147344321012497,
|
|
"signal/advantage_pre_scale_std": 0.1863584667444229,
|
|
"signal/advantage_std": 0.1863584667444229,
|
|
"signal/brier_reward/centered_abs_mean": 0.15073903203010558,
|
|
"signal/brier_reward/group_std_mean": 0.19318909347057342,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018842379003763197,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018842379003763197,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08415258079767227,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11098144203424454,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010519072599709033,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010519072599709033,
|
|
"signal/format_reward/centered_abs_mean": 0.01915690079331398,
|
|
"signal/format_reward/group_std_mean": 0.03609803505241871,
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00957845039665699,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00957845039665699,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.006547454837709665,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.010919546522200108,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00011719943722710013,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00011719943722710013,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07958068549633027,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1058678761124611,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014244942227378487,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014244942227378487,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07958068549633027,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1058678761124611,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014244942227378487,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014244942227378487,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07958068549633027,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1058678761124611,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014244942227378487,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014244942227378487,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07063625603914261,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09437974393367768,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012643889989703895,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012643889989703895,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05389633476734161,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07225526571273803,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009647443424910307,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009647443424910307,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07958068549633027,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1058678761124611,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014244942227378487,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014244942227378487,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014524108730256557,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01814715452492237,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018155135912820697,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018155135912820697,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31221768290134777,
|
|
"calibration/batch_distribution_entropy": 0.6625914790645601,
|
|
"calibration/buffer_distribution_entropy": 0.6597902779721573,
|
|
"calibration/confidence_entropy": 0.40553370995317506,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.09553805774278215,
|
|
"calibration/coverage@15%": 0.2251968503937008,
|
|
"calibration/coverage@20%": 0.31968503937007875,
|
|
"calibration/coverage@25%": 0.44452919947506564,
|
|
"calibration/coverage@30%": 0.5899460078534031,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.20356478247024595,
|
|
"calibration/mean_confidence": 0.792109658834686,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009375,
|
|
"completions/max_length": 3821.0,
|
|
"completions/max_terminated_length": 3821.0,
|
|
"completions/mean_length": 738.2202392578125,
|
|
"completions/mean_terminated_length": 745.2370727539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 237.0,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.0004931938019581139,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0091,
|
|
"num_tokens": 115195360.0,
|
|
"reward": 1.0030123233795165,
|
|
"reward_std": 0.15792331099510193,
|
|
"rewards/accuracy_reward": 0.62734375,
|
|
"rewards/brier_reward": 0.744609785079956,
|
|
"rewards/confidence_uniqueness_reward": 0.7936466217041016,
|
|
"rewards/format_reward": 0.9903645873069763,
|
|
"rewards/frontier_aurc_reward": -0.00615367041900754,
|
|
"rewards/frontier_coverage_1": 0.0065983245614916084,
|
|
"rewards/frontier_coverage_10": 0.0065983245614916084,
|
|
"rewards/frontier_coverage_15": 0.0065983245614916084,
|
|
"rewards/frontier_coverage_20": 0.007367745554074645,
|
|
"rewards/frontier_coverage_25": 0.01494669746607542,
|
|
"rewards/frontier_coverage_5": 0.0065983245614916084,
|
|
"rewards/frontier_ece_reward": 0.008915235195308924,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18585611879825592,
|
|
"signal/accuracy_reward/group_std_mean": 0.24681904017925263,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.29444445073604586,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09292805939912796,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09292805939912796,
|
|
"signal/advantage_abs_mean": 0.11603728979825974,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11603728979825974,
|
|
"signal/advantage_pre_scale_std": 0.18821111917495728,
|
|
"signal/advantage_std": 0.18821111917495728,
|
|
"signal/brier_reward/centered_abs_mean": 0.1572576254606247,
|
|
"signal/brier_reward/group_std_mean": 0.20556944012641906,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019657203182578086,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019657203182578086,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08561482876539231,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11170679777860641,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010701853595674039,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010701853595674039,
|
|
"signal/format_reward/centered_abs_mean": 0.01700846366584301,
|
|
"signal/format_reward/group_std_mean": 0.03309671171009541,
|
|
"signal/format_reward/group_zero_std_frac": 0.8638888835906983,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008504231832921505,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008504231832921505,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.007575357984751463,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.012241183035075664,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013559890358010306,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013559890358010306,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.074223855137825,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.10750024914741516,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013286069501191379,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013286069501191379,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.074223855137825,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.10750024914741516,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013286069501191379,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013286069501191379,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.074223855137825,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10750024914741516,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013286069501191379,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013286069501191379,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06982820257544517,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10162868052721023,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012499248143285513,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012499248143285513,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.047670333087444304,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06957853436470032,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008532989420928061,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008532989420928061,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.074223855137825,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.10750024914741516,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013286069501191379,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013286069501191379,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016095476038753987,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.020388123765587806,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020119345048442484,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020119345048442484,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2525696678555546,
|
|
"calibration/batch_distribution_entropy": 0.7143370845868392,
|
|
"calibration/buffer_distribution_entropy": 0.6817250857748087,
|
|
"calibration/confidence_entropy": 0.4144873480218685,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.04682893809832985,
|
|
"calibration/coverage@15%": 0.4208647940840097,
|
|
"calibration/coverage@20%": 0.5376734252296256,
|
|
"calibration/coverage@25%": 0.6471056085142334,
|
|
"calibration/coverage@30%": 0.7150395778364116,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.15485868166194255,
|
|
"calibration/mean_confidence": 0.7806578303969407,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.007291666666666674,
|
|
"completions/max_length": 3545.2,
|
|
"completions/max_terminated_length": 3545.2,
|
|
"completions/mean_length": 729.8021728515625,
|
|
"completions/mean_terminated_length": 735.1742919921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 213.4,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.0005639444570988417,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0053,
|
|
"num_tokens": 126696729.0,
|
|
"reward": 1.024180245399475,
|
|
"reward_std": 0.14699944406747817,
|
|
"rewards/accuracy_reward": 0.6559027791023254,
|
|
"rewards/brier_reward": 0.7658300518989563,
|
|
"rewards/confidence_uniqueness_reward": 0.8237267851829528,
|
|
"rewards/format_reward": 0.9926215410232544,
|
|
"rewards/frontier_aurc_reward": -0.003957700170576573,
|
|
"rewards/frontier_coverage_1": -0.000882378313690424,
|
|
"rewards/frontier_coverage_10": -0.000882378313690424,
|
|
"rewards/frontier_coverage_15": -0.00038324356428347527,
|
|
"rewards/frontier_coverage_20": 0.0018486734246835113,
|
|
"rewards/frontier_coverage_25": 0.010639767814427614,
|
|
"rewards/frontier_coverage_5": -0.000882378313690424,
|
|
"rewards/frontier_ece_reward": 0.00900000799447298,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1783745676279068,
|
|
"signal/accuracy_reward/group_std_mean": 0.23473148345947265,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3388888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0891872838139534,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0891872838139534,
|
|
"signal/advantage_abs_mean": 0.10917116552591324,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10917116552591324,
|
|
"signal/advantage_pre_scale_std": 0.17814627587795256,
|
|
"signal/advantage_std": 0.17814627587795256,
|
|
"signal/brier_reward/centered_abs_mean": 0.1465170204639435,
|
|
"signal/brier_reward/group_std_mean": 0.18987120389938356,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018314627557992937,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018314627557992937,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08566285967826844,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10883112400770187,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010707857459783554,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010707857459783554,
|
|
"signal/format_reward/centered_abs_mean": 0.01331922747194767,
|
|
"signal/format_reward/group_std_mean": 0.028269005939364433,
|
|
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006659613735973835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006659613735973835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004681824566796422,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0076571997255086895,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.3804658788722e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.3804658788722e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07828285247087478,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11360109597444534,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014012629631906747,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014012629631906747,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07828285247087478,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11360109597444534,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014012629631906747,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014012629631906747,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07654114663600922,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11135455518960953,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00137008645106107,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00137008645106107,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06982675939798355,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10253714323043824,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001249898923560977,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001249898923560977,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04172022417187691,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06313150227069855,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007467919844202697,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007467919844202697,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07828285247087478,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11360109597444534,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014012629631906747,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014012629631906747,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015052585303783417,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01943938247859478,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018815731629729271,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018815731629729271,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3126631474688041,
|
|
"calibration/batch_distribution_entropy": 0.7277950147979884,
|
|
"calibration/buffer_distribution_entropy": 0.7063283269975165,
|
|
"calibration/confidence_entropy": 0.4591715826280735,
|
|
"calibration/coverage@0%": 0.008421052631578947,
|
|
"calibration/coverage@1%": 0.008421052631578947,
|
|
"calibration/coverage@10%": 0.08175629290617849,
|
|
"calibration/coverage@15%": 0.12386155606407323,
|
|
"calibration/coverage@20%": 0.1254405034324943,
|
|
"calibration/coverage@25%": 0.2649911853756027,
|
|
"calibration/coverage@30%": 0.39158820636490194,
|
|
"calibration/coverage@5%": 0.05947368421052631,
|
|
"calibration/ece": 0.16750350495179606,
|
|
"calibration/mean_confidence": 0.7642698401266471,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009982638888888862,
|
|
"completions/max_length": 3730.4,
|
|
"completions/max_terminated_length": 3730.4,
|
|
"completions/mean_length": 726.0807373046875,
|
|
"completions/mean_terminated_length": 733.3897216796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.6,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.000522218644618988,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.009,
|
|
"num_tokens": 138139323.0,
|
|
"reward": 1.0156872868537903,
|
|
"reward_std": 0.14232541620731354,
|
|
"rewards/accuracy_reward": 0.634375,
|
|
"rewards/brier_reward": 0.7619485020637512,
|
|
"rewards/confidence_uniqueness_reward": 0.8558455348014832,
|
|
"rewards/format_reward": 0.9899305582046509,
|
|
"rewards/frontier_aurc_reward": -0.003244505263864994,
|
|
"rewards/frontier_coverage_1": 0.0021130547567736356,
|
|
"rewards/frontier_coverage_10": 0.0021130547567736356,
|
|
"rewards/frontier_coverage_15": 0.0022873246343806386,
|
|
"rewards/frontier_coverage_20": 0.0036932858638465405,
|
|
"rewards/frontier_coverage_25": 0.009348882955964654,
|
|
"rewards/frontier_coverage_5": 0.0021130547567736356,
|
|
"rewards/frontier_ece_reward": 0.007843670062720775,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1700954854488373,
|
|
"signal/accuracy_reward/group_std_mean": 0.22446969747543336,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08504774272441865,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08504774272441865,
|
|
"signal/advantage_abs_mean": 0.10507383644580841,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10507383644580841,
|
|
"signal/advantage_pre_scale_std": 0.17605942785739898,
|
|
"signal/advantage_std": 0.17605942785739898,
|
|
"signal/brier_reward/centered_abs_mean": 0.13834567368030548,
|
|
"signal/brier_reward/group_std_mean": 0.17988546192646027,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017293209210038185,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017293209210038185,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07612362504005432,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10020371675491332,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00951545313000679,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00951545313000679,
|
|
"signal/format_reward/centered_abs_mean": 0.017078992538154127,
|
|
"signal/format_reward/group_std_mean": 0.0330724623054266,
|
|
"signal/format_reward/group_zero_std_frac": 0.8611111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008539496269077063,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008539496269077063,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034367543645203113,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0058014895766973495,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.151790075819008e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.151790075819008e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08826040178537368,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.12145550101995468,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015798611333593727,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015798611333593727,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08826040178537368,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.12145550101995468,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015798611333593727,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015798611333593727,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08678570687770844,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11957939118146896,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015534641221165656,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015534641221165656,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07486912310123443,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10399644821882248,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001340157282538712,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001340157282538712,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04686204046010971,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06673805713653565,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008388305082917214,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008388305082917214,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08826040178537368,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.12145550101995468,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015798611333593727,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015798611333593727,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013867172226309777,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01827750392258167,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017333965282887221,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017333965282887221,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22829603474016733,
|
|
"calibration/batch_distribution_entropy": 0.7491460889045519,
|
|
"calibration/buffer_distribution_entropy": 0.738965155571171,
|
|
"calibration/confidence_entropy": 0.49153403613387575,
|
|
"calibration/coverage@0%": 0.020480859458860516,
|
|
"calibration/coverage@1%": 0.020480859458860516,
|
|
"calibration/coverage@10%": 0.14642396025942012,
|
|
"calibration/coverage@15%": 0.3115578189599651,
|
|
"calibration/coverage@20%": 0.46366534740545295,
|
|
"calibration/coverage@25%": 0.6064039138082674,
|
|
"calibration/coverage@30%": 0.6646767810026385,
|
|
"calibration/coverage@5%": 0.052387027437863144,
|
|
"calibration/ece": 0.12678730760341095,
|
|
"calibration/mean_confidence": 0.7407062089742026,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006770833333333326,
|
|
"completions/max_length": 3199.8,
|
|
"completions/max_terminated_length": 3199.8,
|
|
"completions/mean_length": 771.2448852539062,
|
|
"completions/mean_terminated_length": 776.47412109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 252.8,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.0006210155552253127,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0039,
|
|
"num_tokens": 150088960.0,
|
|
"reward": 1.0522391557693482,
|
|
"reward_std": 0.13885852843523025,
|
|
"rewards/accuracy_reward": 0.6905381917953491,
|
|
"rewards/brier_reward": 0.7954351902008057,
|
|
"rewards/confidence_uniqueness_reward": 0.8835989475250244,
|
|
"rewards/format_reward": 0.9932291626930236,
|
|
"rewards/frontier_aurc_reward": -0.002661742176860571,
|
|
"rewards/frontier_coverage_1": -0.009026618162170052,
|
|
"rewards/frontier_coverage_10": -0.00876101772300899,
|
|
"rewards/frontier_coverage_15": -0.0059734506183303894,
|
|
"rewards/frontier_coverage_20": 0.00023110741749405862,
|
|
"rewards/frontier_coverage_25": 0.014540878124535084,
|
|
"rewards/frontier_coverage_5": -0.009026618162170052,
|
|
"rewards/frontier_ece_reward": 0.006770659517496824,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17407226264476777,
|
|
"signal/accuracy_reward/group_std_mean": 0.22825241684913636,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08703613132238389,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08703613132238389,
|
|
"signal/advantage_abs_mean": 0.10250708907842636,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10250708907842636,
|
|
"signal/advantage_pre_scale_std": 0.17143681049346923,
|
|
"signal/advantage_std": 0.17143681049346923,
|
|
"signal/brier_reward/centered_abs_mean": 0.12454370558261871,
|
|
"signal/brier_reward/group_std_mean": 0.16498080193996428,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015567963197827338,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015567963197827338,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0648738332092762,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0866569384932518,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008109229151159525,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008109229151159525,
|
|
"signal/format_reward/centered_abs_mean": 0.01250000037252903,
|
|
"signal/format_reward/group_std_mean": 0.02707981951534748,
|
|
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006250000186264515,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006250000186264515,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003043852746486664,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005191993620246649,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.448495867312886e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.448495867312886e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08956246227025985,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.12367427349090576,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016031680861487985,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016031680861487985,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08892591893672944,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1228408694267273,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015917738899588584,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015917738899588584,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07821435481309891,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10914516896009445,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014000368304550649,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014000368304550649,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06118913814425468,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08704253137111664,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010952855343930423,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010952855343930423,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04755426123738289,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0676010601222515,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008512212429195642,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008512212429195642,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08956246227025985,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.12367427349090576,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016031680861487985,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016031680861487985,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010565318539738655,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014126934669911861,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001320664817467332,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001320664817467332,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2413408381036839,
|
|
"calibration/batch_distribution_entropy": 0.7529266293718184,
|
|
"calibration/buffer_distribution_entropy": 0.7566801887277335,
|
|
"calibration/confidence_entropy": 0.4488029392924554,
|
|
"calibration/coverage@0%": 0.0015625,
|
|
"calibration/coverage@1%": 0.0015625,
|
|
"calibration/coverage@10%": 0.052083333333333336,
|
|
"calibration/coverage@15%": 0.39773123909249564,
|
|
"calibration/coverage@20%": 0.5354166666666667,
|
|
"calibration/coverage@25%": 0.5969049173194082,
|
|
"calibration/coverage@30%": 0.8075363264842774,
|
|
"calibration/coverage@5%": 0.0015625,
|
|
"calibration/ece": 0.1542229314047629,
|
|
"calibration/mean_confidence": 0.7623944513982001,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009895833333333348,
|
|
"completions/max_length": 3758.0,
|
|
"completions/max_terminated_length": 3758.0,
|
|
"completions/mean_length": 846.7328125,
|
|
"completions/mean_terminated_length": 855.2931884765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 260.2,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.0005486936424858868,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0076,
|
|
"num_tokens": 162896602.0,
|
|
"reward": 1.035358762741089,
|
|
"reward_std": 0.14456366002559662,
|
|
"rewards/accuracy_reward": 0.663281238079071,
|
|
"rewards/brier_reward": 0.7700084805488586,
|
|
"rewards/confidence_uniqueness_reward": 0.8956380486488342,
|
|
"rewards/format_reward": 0.9899305462837219,
|
|
"rewards/frontier_aurc_reward": -0.004593700263649225,
|
|
"rewards/frontier_coverage_1": -0.0053374451585114,
|
|
"rewards/frontier_coverage_10": -0.005398740433156491,
|
|
"rewards/frontier_coverage_15": -0.003581512067466974,
|
|
"rewards/frontier_coverage_20": 0.0033742699888534844,
|
|
"rewards/frontier_coverage_25": 0.01686990410089493,
|
|
"rewards/frontier_coverage_5": -0.0053374451585114,
|
|
"rewards/frontier_ece_reward": 0.0049498746637254955,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.178466796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2359412580728531,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.33055556416511533,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0892333984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0892333984375,
|
|
"signal/advantage_abs_mean": 0.10778079181909561,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10778079181909561,
|
|
"signal/advantage_pre_scale_std": 0.17682308256626128,
|
|
"signal/advantage_std": 0.17682308256626128,
|
|
"signal/brier_reward/centered_abs_mean": 0.13626245856285096,
|
|
"signal/brier_reward/group_std_mean": 0.17717938125133514,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01703280732035637,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01703280732035637,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05800086259841919,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07589098066091537,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007250107824802399,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007250107824802399,
|
|
"signal/format_reward/centered_abs_mean": 0.015288628451526166,
|
|
"signal/format_reward/group_std_mean": 0.02700880281627178,
|
|
"signal/format_reward/group_zero_std_frac": 0.8916666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007644314225763083,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007644314225763083,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00542384460568428,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008771744929254055,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.708681755000725e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.708681755000725e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08281062692403793,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11733318269252777,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014823101460933684,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014823101460933684,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08166301399469375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11585556566715241,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014617678243666887,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014617678243666887,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07230544909834861,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10374643951654434,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012942674802616239,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012942674802616239,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05011899545788765,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07308039665222169,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008971299859695137,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008971299859695137,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.045297824591398236,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06296739131212234,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008108310401439666,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008108310401439666,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08281062692403793,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11733318269252777,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014823101460933684,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014823101460933684,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009097871743142605,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012163439951837062,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011372339678928256,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011372339678928256,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2549322254632649,
|
|
"calibration/batch_distribution_entropy": 0.7912315705537434,
|
|
"calibration/buffer_distribution_entropy": 0.7749982822016868,
|
|
"calibration/confidence_entropy": 0.4605557325249568,
|
|
"calibration/coverage@0%": 0.0026484929078014184,
|
|
"calibration/coverage@1%": 0.0026484929078014184,
|
|
"calibration/coverage@10%": 0.04064321586294653,
|
|
"calibration/coverage@15%": 0.14667673071165252,
|
|
"calibration/coverage@20%": 0.2545642286064485,
|
|
"calibration/coverage@25%": 0.39401181242340777,
|
|
"calibration/coverage@30%": 0.7916024920164326,
|
|
"calibration/coverage@5%": 0.0026484929078014184,
|
|
"calibration/ece": 0.12474794704579159,
|
|
"calibration/mean_confidence": 0.7407469510867013,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006076388888888906,
|
|
"completions/max_length": 3656.8,
|
|
"completions/max_terminated_length": 3656.8,
|
|
"completions/mean_length": 855.488623046875,
|
|
"completions/mean_terminated_length": 860.7367919921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 319.0,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.00048303132643923163,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.0039,
|
|
"num_tokens": 175839031.0,
|
|
"reward": 1.049242663383484,
|
|
"reward_std": 0.13771760761737822,
|
|
"rewards/accuracy_reward": 0.6803819417953492,
|
|
"rewards/brier_reward": 0.7834740161895752,
|
|
"rewards/confidence_uniqueness_reward": 0.9058789372444153,
|
|
"rewards/format_reward": 0.993663203716278,
|
|
"rewards/frontier_aurc_reward": -0.004763692617416382,
|
|
"rewards/frontier_coverage_1": -0.0024880644166842105,
|
|
"rewards/frontier_coverage_10": -0.0016338142449967563,
|
|
"rewards/frontier_coverage_15": 0.00023350361734628678,
|
|
"rewards/frontier_coverage_20": 0.007665848324541003,
|
|
"rewards/frontier_coverage_25": 0.025208524614572524,
|
|
"rewards/frontier_coverage_5": -0.0024880644166842105,
|
|
"rewards/frontier_ece_reward": 0.0052955283783376215,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16981336772441863,
|
|
"signal/accuracy_reward/group_std_mean": 0.22479016780853273,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08490668386220931,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08490668386220931,
|
|
"signal/advantage_abs_mean": 0.0999684289097786,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0999684289097786,
|
|
"signal/advantage_pre_scale_std": 0.17038570940494538,
|
|
"signal/advantage_std": 0.17038570940494538,
|
|
"signal/brier_reward/centered_abs_mean": 0.12644084244966508,
|
|
"signal/brier_reward/group_std_mean": 0.16933887600898742,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015805105306208135,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015805105306208135,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.053865256160497664,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0750114805996418,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006733157020062208,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006733157020062208,
|
|
"signal/format_reward/centered_abs_mean": 0.011767578125,
|
|
"signal/format_reward/group_std_mean": 0.02580878436565399,
|
|
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0058837890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0058837890625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005690837278962135,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00979206943884492,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010186598519794643,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010186598519794643,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07988283634185792,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11243547201156616,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014299027621746062,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014299027621746062,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07791899591684341,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1098724588751793,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013947500381618738,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013947500381618738,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07281370237469673,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10328521132469178,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001303365221247077,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001303365221247077,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.050810272246599196,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07330892160534859,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009095038287341595,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009095038287341595,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04489777684211731,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06135682612657547,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008036701823584735,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008036701823584735,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07988283634185792,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11243547201156616,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014299027621746062,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014299027621746062,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00815775478258729,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010810710676014423,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010197193478234112,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010197193478234112,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1884037021642985,
|
|
"calibration/batch_distribution_entropy": 0.8032843437338201,
|
|
"calibration/buffer_distribution_entropy": 0.7924773042956995,
|
|
"calibration/confidence_entropy": 0.5132141674135235,
|
|
"calibration/coverage@0%": 0.00209705428128203,
|
|
"calibration/coverage@1%": 0.00209705428128203,
|
|
"calibration/coverage@10%": 0.13159094607883876,
|
|
"calibration/coverage@15%": 0.45068144103455543,
|
|
"calibration/coverage@20%": 0.6938492505588094,
|
|
"calibration/coverage@25%": 0.8097932670542966,
|
|
"calibration/coverage@30%": 0.9129219364513481,
|
|
"calibration/coverage@5%": 0.0453262209479487,
|
|
"calibration/ece": 0.07574494386976773,
|
|
"calibration/mean_confidence": 0.7050964063928427,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009375,
|
|
"completions/max_length": 3446.2,
|
|
"completions/max_terminated_length": 3446.2,
|
|
"completions/mean_length": 814.8932250976562,
|
|
"completions/mean_terminated_length": 822.6298583984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 268.6,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.0005044998251833022,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.0078,
|
|
"num_tokens": 188295273.0,
|
|
"reward": 1.048031497001648,
|
|
"reward_std": 0.13147322535514833,
|
|
"rewards/accuracy_reward": 0.6767361044883728,
|
|
"rewards/brier_reward": 0.7857626795768737,
|
|
"rewards/confidence_uniqueness_reward": 0.9275232076644897,
|
|
"rewards/format_reward": 0.990625,
|
|
"rewards/frontier_aurc_reward": -0.003035349538549781,
|
|
"rewards/frontier_coverage_1": -0.009896452794782818,
|
|
"rewards/frontier_coverage_10": -0.00977154376450926,
|
|
"rewards/frontier_coverage_15": -0.008140944095794111,
|
|
"rewards/frontier_coverage_20": 0.0010742006823420524,
|
|
"rewards/frontier_coverage_25": 0.024561950564384462,
|
|
"rewards/frontier_coverage_5": -0.009896452794782818,
|
|
"rewards/frontier_ece_reward": 0.0036846227245405315,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1624348998069763,
|
|
"signal/accuracy_reward/group_std_mean": 0.2136551856994629,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08121744990348816,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08121744990348816,
|
|
"signal/advantage_abs_mean": 0.09799883216619491,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09799883216619491,
|
|
"signal/advantage_pre_scale_std": 0.16718345284461975,
|
|
"signal/advantage_std": 0.16718345284461975,
|
|
"signal/brier_reward/centered_abs_mean": 0.1226424291729927,
|
|
"signal/brier_reward/group_std_mean": 0.16106078028678894,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015330303646624088,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015330303646624088,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040507809817790986,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05724046900868416,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005063476227223873,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005063476227223873,
|
|
"signal/format_reward/centered_abs_mean": 0.01504991315305233,
|
|
"signal/format_reward/group_std_mean": 0.026866191625595094,
|
|
"signal/format_reward/group_zero_std_frac": 0.8944444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007524956576526165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007524956576526165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003690991410985589,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00686999736353755,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.606874376302585e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.606874376302585e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09332177340984345,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.12926909923553467,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016704596579074859,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016704596579074859,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09242226481437683,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.12810271680355073,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001654358464293182,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001654358464293182,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08840162605047226,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12299361377954483,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00158238906878978,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00158238906878978,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.058317091315984726,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08401793241500854,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010438758763484657,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010438758763484657,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04199025183916092,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.05837389156222343,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007516254670917987,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007516254670917987,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09332177340984345,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.12926909923553467,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016704596579074859,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016704596579074859,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007873425912111997,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01113816760480404,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009841782390139996,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009841782390139996,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2511498546221036,
|
|
"calibration/batch_distribution_entropy": 0.8472823428192587,
|
|
"calibration/buffer_distribution_entropy": 0.8292602035451393,
|
|
"calibration/confidence_entropy": 0.5268932324887918,
|
|
"calibration/coverage@0%": 0.007878203361807659,
|
|
"calibration/coverage@1%": 0.007878203361807659,
|
|
"calibration/coverage@10%": 0.20039129236704323,
|
|
"calibration/coverage@15%": 0.38163405896941305,
|
|
"calibration/coverage@20%": 0.5147368421052632,
|
|
"calibration/coverage@25%": 0.5347368421052632,
|
|
"calibration/coverage@30%": 0.5657894736842105,
|
|
"calibration/coverage@5%": 0.023141361256544504,
|
|
"calibration/ece": 0.1191731328965677,
|
|
"calibration/mean_confidence": 0.673483074709598,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0039062500000000226,
|
|
"completions/max_length": 3117.4,
|
|
"completions/max_terminated_length": 3117.4,
|
|
"completions/mean_length": 822.5976684570312,
|
|
"completions/mean_terminated_length": 825.820654296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 249.4,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 0.0005109178018756211,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0023,
|
|
"num_tokens": 200863278.0,
|
|
"reward": 1.0485934257507323,
|
|
"reward_std": 0.12660266309976578,
|
|
"rewards/accuracy_reward": 0.668749988079071,
|
|
"rewards/brier_reward": 0.7837409973144531,
|
|
"rewards/confidence_uniqueness_reward": 0.9414145112037658,
|
|
"rewards/format_reward": 0.9959201335906982,
|
|
"rewards/frontier_aurc_reward": -0.003332670731469989,
|
|
"rewards/frontier_coverage_1": -0.00962460646405816,
|
|
"rewards/frontier_coverage_10": -0.009027575980871916,
|
|
"rewards/frontier_coverage_15": -0.0062865779735147955,
|
|
"rewards/frontier_coverage_20": 0.006169534660875798,
|
|
"rewards/frontier_coverage_25": 0.049424213171005246,
|
|
"rewards/frontier_coverage_5": -0.00962460646405816,
|
|
"rewards/frontier_ece_reward": 0.0023768938961438836,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16069878339767457,
|
|
"signal/accuracy_reward/group_std_mean": 0.2120576322078705,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08034939169883729,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08034939169883729,
|
|
"signal/advantage_abs_mean": 0.09407227784395218,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09407227784395218,
|
|
"signal/advantage_pre_scale_std": 0.1572835475206375,
|
|
"signal/advantage_std": 0.1572835475206375,
|
|
"signal/brier_reward/centered_abs_mean": 0.12441224157810211,
|
|
"signal/brier_reward/group_std_mean": 0.16177276968955995,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015551530197262764,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015551530197262764,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031784088164567945,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04672372043132782,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003973011020570993,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003973011020570993,
|
|
"signal/format_reward/centered_abs_mean": 0.007590060774236918,
|
|
"signal/format_reward/group_std_mean": 0.01747054308652878,
|
|
"signal/format_reward/group_zero_std_frac": 0.9166666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003795030387118459,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.003795030387118459,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004074021661654115,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.007213028613477945,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.29249841242563e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.29249841242563e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11047781854867936,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1514023333787918,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001977552776224911,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001977552776224911,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10917936712503433,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14975302815437316,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001954310527071357,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001954310527071357,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10132132470607758,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13981849551200867,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018136516213417054,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018136516213417054,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06076301485300064,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08724861890077591,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010876578977331518,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010876578977331518,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06519225090742112,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08802225440740585,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011669412604533135,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011669412604533135,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11047781854867936,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1514023333787918,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001977552776224911,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001977552776224911,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009134245105087757,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01337057575583458,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011417806381359696,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011417806381359696,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20278364231917356,
|
|
"calibration/batch_distribution_entropy": 0.8280205913017324,
|
|
"calibration/buffer_distribution_entropy": 0.8403697455023262,
|
|
"calibration/confidence_entropy": 0.5017551241839072,
|
|
"calibration/coverage@0%": 0.004732741740615757,
|
|
"calibration/coverage@1%": 0.004732741740615757,
|
|
"calibration/coverage@10%": 0.15281197352277737,
|
|
"calibration/coverage@15%": 0.4388613860856143,
|
|
"calibration/coverage@20%": 0.591331061916468,
|
|
"calibration/coverage@25%": 0.7446475195822455,
|
|
"calibration/coverage@30%": 0.8691493747423389,
|
|
"calibration/coverage@5%": 0.02478023580385785,
|
|
"calibration/ece": 0.1056119338962247,
|
|
"calibration/mean_confidence": 0.7101517376892353,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006076388888888884,
|
|
"completions/max_length": 3497.0,
|
|
"completions/max_terminated_length": 3497.0,
|
|
"completions/mean_length": 854.01025390625,
|
|
"completions/mean_terminated_length": 859.19150390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 278.8,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.0004937337362207472,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0045,
|
|
"num_tokens": 213800548.0,
|
|
"reward": 1.0567084312438966,
|
|
"reward_std": 0.1290334552526474,
|
|
"rewards/accuracy_reward": 0.6779513835906983,
|
|
"rewards/brier_reward": 0.8075439810752869,
|
|
"rewards/confidence_uniqueness_reward": 0.9373886227607727,
|
|
"rewards/format_reward": 0.993663203716278,
|
|
"rewards/frontier_aurc_reward": -0.0039381059817969796,
|
|
"rewards/frontier_coverage_1": 0.012442300096154213,
|
|
"rewards/frontier_coverage_10": 0.013322338275611401,
|
|
"rewards/frontier_coverage_15": 0.015895536914467812,
|
|
"rewards/frontier_coverage_20": 0.022733899392187597,
|
|
"rewards/frontier_coverage_25": 0.05824657455086708,
|
|
"rewards/frontier_coverage_5": 0.012462735641747713,
|
|
"rewards/frontier_ece_reward": 0.0034934583585709334,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16227213740348817,
|
|
"signal/accuracy_reward/group_std_mean": 0.21252356767654418,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.397222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08113606870174409,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08113606870174409,
|
|
"signal/advantage_abs_mean": 0.09522689133882523,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09522689133882523,
|
|
"signal/advantage_pre_scale_std": 0.16290957629680633,
|
|
"signal/advantage_std": 0.16290957629680633,
|
|
"signal/brier_reward/centered_abs_mean": 0.11964384317398072,
|
|
"signal/brier_reward/group_std_mean": 0.15935627818107606,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01495548039674759,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01495548039674759,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03456774652004242,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.052286341041326526,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004320968315005303,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004320968315005303,
|
|
"signal/format_reward/centered_abs_mean": 0.011051432322710752,
|
|
"signal/format_reward/group_std_mean": 0.024242669716477393,
|
|
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005525716161355376,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005525716161355376,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005287631414830685,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.009730769135057927,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.464860195294023e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.464860195294023e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1009969249367714,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13951509296894074,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018078448716551065,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018078448716551065,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09884380400180817,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.136749067902565,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017693039961159229,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017693039961159229,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0821550577878952,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11540477871894836,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014705754816532134,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014705754816532134,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05389119237661362,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07688918858766555,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009646523278206587,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009646523278206587,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.057782044261693956,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07765513509511948,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010342985624447465,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010342985624447465,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10051819980144501,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.13889843970537186,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017992756562307476,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017992756562307476,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006692274007946253,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009778609126806259,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008365342509932816,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008365342509932816,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 0.19508682084988796,
|
|
"eval_calibration/batch_distribution_entropy": 0.7643132158883287,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8500895559339968,
|
|
"eval_calibration/confidence_entropy": 0.4922978578588346,
|
|
"eval_calibration/coverage@0%": 0.16515456989247312,
|
|
"eval_calibration/coverage@1%": 0.16515456989247312,
|
|
"eval_calibration/coverage@10%": 0.24579973118279572,
|
|
"eval_calibration/coverage@15%": 0.45060483870967744,
|
|
"eval_calibration/coverage@20%": 0.6233198924731183,
|
|
"eval_calibration/coverage@25%": 0.8323252688172044,
|
|
"eval_calibration/coverage@30%": 0.9322916666666666,
|
|
"eval_calibration/coverage@5%": 0.18128360215053763,
|
|
"eval_calibration/ece": 0.13874366599462365,
|
|
"eval_calibration/mean_confidence": 0.7162100638440861,
|
|
"eval_completions/clipped_ratio": 0.006076388888888895,
|
|
"eval_completions/max_length": 2862.6666666666665,
|
|
"eval_completions/max_terminated_length": 2862.6666666666665,
|
|
"eval_completions/mean_length": 856.265635172526,
|
|
"eval_completions/mean_terminated_length": 861.4902547200521,
|
|
"eval_completions/min_length": 122.5,
|
|
"eval_completions/min_terminated_length": 338.1666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 213800548.0,
|
|
"eval_reward": 1.040560742219289,
|
|
"eval_reward_std": 0.26353143403927487,
|
|
"eval_rewards/accuracy_reward": 0.667534718910853,
|
|
"eval_rewards/brier_reward": 0.7879191040992737,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8808565934499105,
|
|
"eval_rewards/format_reward": 0.9930555621782938,
|
|
"eval_rewards/frontier_aurc_reward": -0.004968842452702423,
|
|
"eval_rewards/frontier_coverage_1": 0.004989876101414363,
|
|
"eval_rewards/frontier_coverage_10": 0.00570684849905471,
|
|
"eval_rewards/frontier_coverage_15": 0.007359214670335253,
|
|
"eval_rewards/frontier_coverage_20": 0.015881775800759595,
|
|
"eval_rewards/frontier_coverage_25": 0.04367877449840307,
|
|
"eval_rewards/frontier_coverage_5": 0.004989876101414363,
|
|
"eval_rewards/frontier_ece_reward": 0.002231398791385194,
|
|
"eval_runtime": 189.412,
|
|
"eval_samples_per_second": 5.279,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4254014740387599,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.46698982020219165,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21270073701937994,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21270073701937994,
|
|
"eval_signal/advantage_abs_mean": 0.23313634594281515,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.23313634594281515,
|
|
"eval_signal/advantage_pre_scale_std": 0.26145924379428226,
|
|
"eval_signal/advantage_std": 0.26145924379428226,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20662419497966766,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2611635724703471,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025828024372458458,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.025828024372458458,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05225155937174956,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07714233547449112,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006531444921468695,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006531444921468695,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.013346354166666666,
|
|
"eval_signal/format_reward/group_std_mean": 0.03629430073002974,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.8055555721124014,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006673177083333333,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.006673177083333333,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.008770209504291415,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.017012828961014748,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00015698675148693533,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00015698675148693533,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.11700086171428363,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.19166247049967447,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002094315461969624,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002094315461969624,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.11191717411080997,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.18493242065111795,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020033172719801464,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020033172719801464,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.08915293340881665,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.15359986076752344,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015958373939308028,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015958373939308028,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.05830358279248079,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.09689446166157722,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010436340235173702,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010436340235173702,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08435119688510895,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.10973832756280899,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015098864290242393,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015098864290242393,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.11700086171428363,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.19166247049967447,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002094315461969624,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002094315461969624,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006453142423803608,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.010442674780885378,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000806642802975451,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000806642802975451,
|
|
"eval_steps_per_second": 0.032,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3511401691340529,
|
|
"calibration/batch_distribution_entropy": 0.757278262607608,
|
|
"calibration/buffer_distribution_entropy": 0.8430223025636077,
|
|
"calibration/confidence_entropy": 0.4727277883725902,
|
|
"calibration/coverage@0%": 0.0010416666666666667,
|
|
"calibration/coverage@1%": 0.0010416666666666667,
|
|
"calibration/coverage@10%": 0.10262061403508774,
|
|
"calibration/coverage@15%": 0.13788377192982457,
|
|
"calibration/coverage@20%": 0.19810855263157895,
|
|
"calibration/coverage@25%": 0.3067105263157895,
|
|
"calibration/coverage@30%": 0.3810792349726776,
|
|
"calibration/coverage@5%": 0.033146929824561404,
|
|
"calibration/ece": 0.18226900464138213,
|
|
"calibration/mean_confidence": 0.7595750235377008,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010156249999999978,
|
|
"completions/max_length": 3543.0,
|
|
"completions/max_terminated_length": 3543.0,
|
|
"completions/mean_length": 862.9362915039062,
|
|
"completions/mean_terminated_length": 871.67646484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 279.2,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 0.0005761328502558172,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.0081,
|
|
"num_tokens": 226818438.0,
|
|
"reward": 1.048232102394104,
|
|
"reward_std": 0.1332755818963051,
|
|
"rewards/accuracy_reward": 0.6735243082046509,
|
|
"rewards/brier_reward": 0.7883717060089112,
|
|
"rewards/confidence_uniqueness_reward": 0.9287825942039489,
|
|
"rewards/format_reward": 0.9895833253860473,
|
|
"rewards/frontier_aurc_reward": -0.00578044205904007,
|
|
"rewards/frontier_coverage_1": 0.007354969310108572,
|
|
"rewards/frontier_coverage_10": 0.007265249360352754,
|
|
"rewards/frontier_coverage_15": 0.009339299611747265,
|
|
"rewards/frontier_coverage_20": 0.018504777178168296,
|
|
"rewards/frontier_coverage_25": 0.051360327005386355,
|
|
"rewards/frontier_coverage_5": 0.007354969310108572,
|
|
"rewards/frontier_ece_reward": 0.0026106106583029033,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15887044221162797,
|
|
"signal/accuracy_reward/group_std_mean": 0.21121549904346465,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.397222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07943522110581398,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07943522110581398,
|
|
"signal/advantage_abs_mean": 0.09754386842250824,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09754386842250824,
|
|
"signal/advantage_pre_scale_std": 0.16947126388549805,
|
|
"signal/advantage_std": 0.16947126388549805,
|
|
"signal/brier_reward/centered_abs_mean": 0.1252099484205246,
|
|
"signal/brier_reward/group_std_mean": 0.16566490530967712,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015651243552565575,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015651243552565575,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04011865481734276,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.057522188127040866,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005014831852167845,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005014831852167845,
|
|
"signal/format_reward/centered_abs_mean": 0.016981336660683154,
|
|
"signal/format_reward/group_std_mean": 0.029765255004167556,
|
|
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008490668330341577,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008490668330341577,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.007304486818611622,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.013138260692358017,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013075030146865175,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013075030146865175,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0824627086520195,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1177333876490593,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014760824386030435,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014760824386030435,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08060138821601867,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11523205190896987,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014427647460252047,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014427647460252047,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07278760075569153,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10448294579982757,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013028981164097786,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013028981164097786,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04164608493447304,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.060693875700235364,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007454649079591036,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007454649079591036,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05113328471779823,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06808223649859428,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009152857935987412,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009152857935987412,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0824627086520195,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1177333876490593,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014760824386030435,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014760824386030435,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004956904333084822,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006825331319123507,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006196130416356027,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006196130416356027,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20816277950066214,
|
|
"calibration/batch_distribution_entropy": 0.7404046559516211,
|
|
"calibration/buffer_distribution_entropy": 0.8073233848731809,
|
|
"calibration/confidence_entropy": 0.4333681136935855,
|
|
"calibration/coverage@0%": 0.0010416666666666667,
|
|
"calibration/coverage@1%": 0.0010416666666666667,
|
|
"calibration/coverage@10%": 0.16827268276762403,
|
|
"calibration/coverage@15%": 0.3403001671154977,
|
|
"calibration/coverage@20%": 0.48926075741938135,
|
|
"calibration/coverage@25%": 0.6757180156657964,
|
|
"calibration/coverage@30%": 0.8710182767624021,
|
|
"calibration/coverage@5%": 0.03125,
|
|
"calibration/ece": 0.1267235183366333,
|
|
"calibration/mean_confidence": 0.7824960816740748,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008506944444444465,
|
|
"completions/max_length": 3628.6,
|
|
"completions/max_terminated_length": 3628.6,
|
|
"completions/mean_length": 902.2748291015625,
|
|
"completions/mean_terminated_length": 910.0505859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 308.2,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.0005406757700257003,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.0053,
|
|
"num_tokens": 240321092.0,
|
|
"reward": 1.068948006629944,
|
|
"reward_std": 0.13193423300981522,
|
|
"rewards/accuracy_reward": 0.7086805462837219,
|
|
"rewards/brier_reward": 0.8089686393737793,
|
|
"rewards/confidence_uniqueness_reward": 0.9270894527435303,
|
|
"rewards/format_reward": 0.9913194417953491,
|
|
"rewards/frontier_aurc_reward": -0.004053758783265948,
|
|
"rewards/frontier_coverage_1": 0.004211192601360381,
|
|
"rewards/frontier_coverage_10": 0.004211192601360381,
|
|
"rewards/frontier_coverage_15": 0.0068603390827775,
|
|
"rewards/frontier_coverage_20": 0.0205037584528327,
|
|
"rewards/frontier_coverage_25": 0.050310605019330976,
|
|
"rewards/frontier_coverage_5": 0.004211192601360381,
|
|
"rewards/frontier_ece_reward": 0.0031742799561470746,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15706380307674409,
|
|
"signal/accuracy_reward/group_std_mean": 0.2148415267467499,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36111111044883726,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07853190153837204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07853190153837204,
|
|
"signal/advantage_abs_mean": 0.09467502385377884,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09467502385377884,
|
|
"signal/advantage_pre_scale_std": 0.16729762852191926,
|
|
"signal/advantage_std": 0.16729762852191926,
|
|
"signal/brier_reward/centered_abs_mean": 0.1214766725897789,
|
|
"signal/brier_reward/group_std_mean": 0.16396571099758148,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015184584073722363,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015184584073722363,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038360346108675,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05404561161994934,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004795043263584375,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004795043263584375,
|
|
"signal/format_reward/centered_abs_mean": 0.01239149309694767,
|
|
"signal/format_reward/group_std_mean": 0.023483334854245187,
|
|
"signal/format_reward/group_zero_std_frac": 0.9000000119209289,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006195746548473835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006195746548473835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005569100752472878,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.010228174738585949,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.968689846573398e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.968689846573398e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07596084028482437,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1094392940402031,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013596989447250962,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013596989447250962,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07596084028482437,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1094392940402031,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013596989447250962,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013596989447250962,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0660040944814682,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09634122103452683,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011814731871709228,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011814731871709228,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03823840469121933,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.055505610257387164,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006844674120657146,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006844674120657146,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04834548756480217,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06364585980772972,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008653842494823038,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008653842494823038,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07596084028482437,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1094392940402031,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013596989447250962,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013596989447250962,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004714849684387445,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006203057337552309,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005893562105484307,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005893562105484307,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3535007824326083,
|
|
"calibration/batch_distribution_entropy": 0.7879573423744334,
|
|
"calibration/buffer_distribution_entropy": 0.7750194996530027,
|
|
"calibration/confidence_entropy": 0.46526847791535486,
|
|
"calibration/coverage@0%": 0.003172045997444586,
|
|
"calibration/coverage@1%": 0.003172045997444586,
|
|
"calibration/coverage@10%": 0.003172045997444586,
|
|
"calibration/coverage@15%": 0.056895450252763734,
|
|
"calibration/coverage@20%": 0.15955502472084884,
|
|
"calibration/coverage@25%": 0.2010443864229765,
|
|
"calibration/coverage@30%": 0.4401978255764156,
|
|
"calibration/coverage@5%": 0.003172045997444586,
|
|
"calibration/ece": 0.22863723387297646,
|
|
"calibration/mean_confidence": 0.7457530588732254,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010416666666666675,
|
|
"completions/max_length": 3994.0,
|
|
"completions/max_terminated_length": 3994.0,
|
|
"completions/mean_length": 954.9671020507812,
|
|
"completions/mean_terminated_length": 965.1707153320312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 328.0,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 0.0004946527187712491,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": -0.0074,
|
|
"num_tokens": 254401513.0,
|
|
"reward": 1.04098482131958,
|
|
"reward_std": 0.13563843965530395,
|
|
"rewards/accuracy_reward": 0.6626736164093018,
|
|
"rewards/brier_reward": 0.7801254391670227,
|
|
"rewards/confidence_uniqueness_reward": 0.924898338317871,
|
|
"rewards/format_reward": 0.9892361044883728,
|
|
"rewards/frontier_aurc_reward": -0.004975694324821233,
|
|
"rewards/frontier_coverage_1": 0.00669761549361283,
|
|
"rewards/frontier_coverage_10": 0.00669761549361283,
|
|
"rewards/frontier_coverage_15": 0.0074621538631618025,
|
|
"rewards/frontier_coverage_20": 0.016340048145502807,
|
|
"rewards/frontier_coverage_25": 0.046477542445063594,
|
|
"rewards/frontier_coverage_5": 0.00669761549361283,
|
|
"rewards/frontier_ece_reward": 0.002986938552930951,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16362847089767457,
|
|
"signal/accuracy_reward/group_std_mean": 0.2102369487285614,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08181423544883729,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08181423544883729,
|
|
"signal/advantage_abs_mean": 0.10116954147815704,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10116954147815704,
|
|
"signal/advantage_pre_scale_std": 0.1757548063993454,
|
|
"signal/advantage_std": 0.1757548063993454,
|
|
"signal/brier_reward/centered_abs_mean": 0.13105646818876265,
|
|
"signal/brier_reward/group_std_mean": 0.1718491792678833,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01638205852359533,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01638205852359533,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04196493178606033,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06228391453623772,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0052456164732575415,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0052456164732575415,
|
|
"signal/format_reward/centered_abs_mean": 0.016406250139698385,
|
|
"signal/format_reward/group_std_mean": 0.0324904628098011,
|
|
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008203125069849193,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008203125069849193,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.006411750707775355,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.011484375223517418,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001147703340393491,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001147703340393491,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07490146160125732,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1070878341794014,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013407361460849644,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013407361460849644,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07490146160125732,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1070878341794014,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013407361460849644,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013407361460849644,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06685705333948136,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09658489525318145,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011967412428930402,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011967412428930402,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.040889284759759906,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05778271481394768,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000731918157543987,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000731918157543987,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.055317191779613493,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07240601480007172,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009901776560582221,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009901776560582221,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07490146160125732,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1070878341794014,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013407361460849644,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013407361460849644,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005480154789984227,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007094941008836031,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006850193487480283,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006850193487480283,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27590543776197624,
|
|
"calibration/batch_distribution_entropy": 0.7837476364249507,
|
|
"calibration/buffer_distribution_entropy": 0.7804702805505999,
|
|
"calibration/confidence_entropy": 0.4697839469577409,
|
|
"calibration/coverage@0%": 0.026063424717145345,
|
|
"calibration/coverage@1%": 0.026063424717145345,
|
|
"calibration/coverage@10%": 0.06167007814052618,
|
|
"calibration/coverage@15%": 0.14026525333566334,
|
|
"calibration/coverage@20%": 0.47490888816362054,
|
|
"calibration/coverage@25%": 0.5881105852915579,
|
|
"calibration/coverage@30%": 0.6391998476936467,
|
|
"calibration/coverage@5%": 0.03283425805047868,
|
|
"calibration/ece": 0.1501559543471629,
|
|
"calibration/mean_confidence": 0.7493281618142615,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00737847222222221,
|
|
"completions/max_length": 3241.2,
|
|
"completions/max_terminated_length": 3241.2,
|
|
"completions/mean_length": 932.678564453125,
|
|
"completions/mean_terminated_length": 939.5830688476562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 352.4,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 0.00040981321944855154,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.0061,
|
|
"num_tokens": 268227826.0,
|
|
"reward": 1.0591084241867066,
|
|
"reward_std": 0.13269660919904708,
|
|
"rewards/accuracy_reward": 0.6907986164093017,
|
|
"rewards/brier_reward": 0.7958985686302185,
|
|
"rewards/confidence_uniqueness_reward": 0.9336257338523865,
|
|
"rewards/format_reward": 0.9923611044883728,
|
|
"rewards/frontier_aurc_reward": -0.003998850425705314,
|
|
"rewards/frontier_coverage_1": -0.00013376011047512294,
|
|
"rewards/frontier_coverage_10": -0.00013376011047512294,
|
|
"rewards/frontier_coverage_15": 0.0030363661935552955,
|
|
"rewards/frontier_coverage_20": 0.012066485453397035,
|
|
"rewards/frontier_coverage_25": 0.03752352148294449,
|
|
"rewards/frontier_coverage_5": -0.00013376011047512294,
|
|
"rewards/frontier_ece_reward": 0.00379801276139915,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1576171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.20998079478740692,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07880859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07880859375,
|
|
"signal/advantage_abs_mean": 0.09780252277851105,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09780252277851105,
|
|
"signal/advantage_pre_scale_std": 0.1692167788743973,
|
|
"signal/advantage_std": 0.1692167788743973,
|
|
"signal/brier_reward/centered_abs_mean": 0.127366141974926,
|
|
"signal/brier_reward/group_std_mean": 0.16660855412483216,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01592076774686575,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01592076774686575,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0357662245631218,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05192890390753746,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004470778070390225,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004470778070390225,
|
|
"signal/format_reward/centered_abs_mean": 0.01287977434694767,
|
|
"signal/format_reward/group_std_mean": 0.025085731595754623,
|
|
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006439887173473835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006439887173473835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00548218721523881,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.009825864806771278,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.813114884309471e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.813114884309471e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07798316925764084,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11075976490974426,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013958987081423401,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013958987081423401,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07798316925764084,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11075976490974426,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013958987081423401,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013958987081423401,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06656168177723884,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09562954902648926,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011914541013538838,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011914541013538838,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04737524390220642,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06730483770370484,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008480168529786169,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008480168529786169,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04951869696378708,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06624811142683029,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008863846655003726,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008863846655003726,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07798316925764084,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11075976490974426,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013958987081423401,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013958987081423401,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0061300666071474556,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007925087120383978,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007662583258934319,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007662583258934319,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2490672232995655,
|
|
"calibration/batch_distribution_entropy": 0.7568955546189245,
|
|
"calibration/buffer_distribution_entropy": 0.7913037149729222,
|
|
"calibration/confidence_entropy": 0.46519046009015774,
|
|
"calibration/coverage@0%": 0.0010416666666666667,
|
|
"calibration/coverage@1%": 0.0010416666666666667,
|
|
"calibration/coverage@10%": 0.012030677655677656,
|
|
"calibration/coverage@15%": 0.28572573260073264,
|
|
"calibration/coverage@20%": 0.39451121794871796,
|
|
"calibration/coverage@25%": 0.4653273809523809,
|
|
"calibration/coverage@30%": 0.8012541214768696,
|
|
"calibration/coverage@5%": 0.0010416666666666667,
|
|
"calibration/ece": 0.10894207814504417,
|
|
"calibration/mean_confidence": 0.7644244479942441,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006944444444444442,
|
|
"completions/max_length": 3704.6,
|
|
"completions/max_terminated_length": 3704.6,
|
|
"completions/mean_length": 894.88994140625,
|
|
"completions/mean_terminated_length": 901.1129150390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 334.2,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 0.000503013376146555,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0061,
|
|
"num_tokens": 281654622.0,
|
|
"reward": 1.1232542037963866,
|
|
"reward_std": 0.14578927159309388,
|
|
"rewards/accuracy_reward": 0.6760416626930237,
|
|
"rewards/brier_reward": 0.7950587749481202,
|
|
"rewards/confidence_uniqueness_reward": 0.9364853739738465,
|
|
"rewards/format_reward": 0.9928819417953492,
|
|
"rewards/frontier_aurc_reward": 0.27824820578098297,
|
|
"rewards/frontier_coverage_1": 0.28667475739493964,
|
|
"rewards/frontier_coverage_10": 0.2890449246915523,
|
|
"rewards/frontier_coverage_15": 0.2900581806898117,
|
|
"rewards/frontier_coverage_20": 0.3047246981412172,
|
|
"rewards/frontier_coverage_25": 0.32476295456290244,
|
|
"rewards/frontier_coverage_5": 0.2867823286447674,
|
|
"rewards/frontier_ece_reward": 0.28376022321172056,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1623372405767441,
|
|
"signal/accuracy_reward/group_std_mean": 0.21117228865623475,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4055555522441864,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08116862028837205,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08116862028837205,
|
|
"signal/advantage_abs_mean": 0.11012209504842758,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11012209504842758,
|
|
"signal/advantage_pre_scale_std": 0.1846640706062317,
|
|
"signal/advantage_std": 0.1846640706062317,
|
|
"signal/brier_reward/centered_abs_mean": 0.12385518252849578,
|
|
"signal/brier_reward/group_std_mean": 0.16202466189861298,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015481897816061973,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015481897816061973,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033407071605324745,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04837455451488495,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004175883950665593,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004175883950665593,
|
|
"signal/format_reward/centered_abs_mean": 0.010980902891606092,
|
|
"signal/format_reward/group_std_mean": 0.021640064381062984,
|
|
"signal/format_reward/group_zero_std_frac": 0.9055555582046508,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005490451445803046,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005490451445803046,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.06701602544635535,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.08801630456000567,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0011995868175290526,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0011995868175290526,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11247340589761734,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.14900651276111604,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002013273839838803,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002013273839838803,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10776370391249657,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1427865594625473,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019289702409878372,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019289702409878372,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10236316695809364,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1350301742553711,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001832300634123385,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001832300634123385,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09971490427851677,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1307316705584526,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001784896687604487,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001784896687604487,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10168659240007401,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13174740523099898,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018201899249106646,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018201899249106646,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11220043301582336,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1486586645245552,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002008387632668018,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002008387632668018,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06663836631923914,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08546235710382462,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008329795789904892,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008329795789904892,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30066707983754387,
|
|
"calibration/batch_distribution_entropy": 0.8246395962709563,
|
|
"calibration/buffer_distribution_entropy": 0.7884855069320794,
|
|
"calibration/confidence_entropy": 0.4842545938574242,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.071875,
|
|
"calibration/coverage@15%": 0.17053008050478677,
|
|
"calibration/coverage@20%": 0.2885958243538204,
|
|
"calibration/coverage@25%": 0.3971019499469028,
|
|
"calibration/coverage@30%": 0.5022268091824544,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.13698749449646505,
|
|
"calibration/mean_confidence": 0.7181843915855899,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013281249999999977,
|
|
"completions/max_length": 3485.4,
|
|
"completions/max_terminated_length": 3485.4,
|
|
"completions/mean_length": 880.1,
|
|
"completions/mean_terminated_length": 892.1025756835937,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 256.2,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 0.0005537553806789219,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.0116,
|
|
"num_tokens": 294918174.0,
|
|
"reward": 1.0387332916259766,
|
|
"reward_std": 0.14156851768493653,
|
|
"rewards/accuracy_reward": 0.6585069417953491,
|
|
"rewards/brier_reward": 0.7857657551765442,
|
|
"rewards/confidence_uniqueness_reward": 0.9319911122322082,
|
|
"rewards/format_reward": 0.985243046283722,
|
|
"rewards/frontier_aurc_reward": -0.005019997013732791,
|
|
"rewards/frontier_coverage_1": 0.005013994639739394,
|
|
"rewards/frontier_coverage_10": 0.005232905806042254,
|
|
"rewards/frontier_coverage_15": 0.009756483789533376,
|
|
"rewards/frontier_coverage_20": 0.019731305353343487,
|
|
"rewards/frontier_coverage_25": 0.056990716606378555,
|
|
"rewards/frontier_coverage_5": 0.005129149602726102,
|
|
"rewards/frontier_ece_reward": 0.0032425791956484317,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17151692509651184,
|
|
"signal/accuracy_reward/group_std_mean": 0.22183212041854858,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38888890147209165,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08575846254825592,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08575846254825592,
|
|
"signal/advantage_abs_mean": 0.10706333220005035,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10706333220005035,
|
|
"signal/advantage_pre_scale_std": 0.1797438532114029,
|
|
"signal/advantage_std": 0.1797438532114029,
|
|
"signal/brier_reward/centered_abs_mean": 0.13050457537174226,
|
|
"signal/brier_reward/group_std_mean": 0.17029784321784974,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016313071921467782,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016313071921467782,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0440841481089592,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06221437379717827,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055105185136199,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055105185136199,
|
|
"signal/format_reward/centered_abs_mean": 0.02253689235076308,
|
|
"signal/format_reward/group_std_mean": 0.036004848405718805,
|
|
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01126844617538154,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01126844617538154,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.006880612950772047,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.012929960340261459,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00012316296924836935,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00012316296924836935,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09724105149507523,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13408505618572236,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001740614790469408,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001740614790469408,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09663455486297608,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1333113506436348,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001729758526198566,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001729758526198566,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0839831992983818,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11663785576820374,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015032992465421557,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015032992465421557,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.062086595594882964,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08598276525735855,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011113500688225031,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011113500688225031,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06557924449443817,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08785432279109955,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001173868461046368,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001173868461046368,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09715530872344971,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.13397427797317504,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017390799708664416,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017390799708664416,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007921741157770158,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010610108450055122,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009902176447212697,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009902176447212697,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2678442784741456,
|
|
"calibration/batch_distribution_entropy": 0.7911120701942849,
|
|
"calibration/buffer_distribution_entropy": 0.8070088429755357,
|
|
"calibration/confidence_entropy": 0.4452843908823473,
|
|
"calibration/coverage@0%": 0.011491141732283465,
|
|
"calibration/coverage@1%": 0.011491141732283465,
|
|
"calibration/coverage@10%": 0.2167979002624672,
|
|
"calibration/coverage@15%": 0.2167979002624672,
|
|
"calibration/coverage@20%": 0.2797900262467191,
|
|
"calibration/coverage@25%": 0.5111410415803288,
|
|
"calibration/coverage@30%": 0.58236026212184,
|
|
"calibration/coverage@5%": 0.041699475065616796,
|
|
"calibration/ece": 0.16236960911891057,
|
|
"calibration/mean_confidence": 0.7544395301047242,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008506944444444442,
|
|
"completions/max_length": 3544.8,
|
|
"completions/max_terminated_length": 3544.8,
|
|
"completions/mean_length": 838.1894897460937,
|
|
"completions/mean_terminated_length": 845.3768188476563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 240.4,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 0.0006604056106880307,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.0072,
|
|
"num_tokens": 307667141.0,
|
|
"reward": 1.0535424947738647,
|
|
"reward_std": 0.12735026627779006,
|
|
"rewards/accuracy_reward": 0.6780381917953491,
|
|
"rewards/brier_reward": 0.7968827724456787,
|
|
"rewards/confidence_uniqueness_reward": 0.9389971017837524,
|
|
"rewards/format_reward": 0.9914930462837219,
|
|
"rewards/frontier_aurc_reward": -0.00391119560226798,
|
|
"rewards/frontier_coverage_1": 0.003469866211526096,
|
|
"rewards/frontier_coverage_10": 0.003514829161576927,
|
|
"rewards/frontier_coverage_15": 0.0063144458457827565,
|
|
"rewards/frontier_coverage_20": 0.016259027272462846,
|
|
"rewards/frontier_coverage_25": 0.04592671990394592,
|
|
"rewards/frontier_coverage_5": 0.003469866211526096,
|
|
"rewards/frontier_ece_reward": 0.0035883668344467877,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15276149958372115,
|
|
"signal/accuracy_reward/group_std_mean": 0.20271962285041809,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.42500001192092896,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07638074979186057,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07638074979186057,
|
|
"signal/advantage_abs_mean": 0.09290645867586136,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09290645867586136,
|
|
"signal/advantage_pre_scale_std": 0.16355342268943787,
|
|
"signal/advantage_std": 0.16355342268943787,
|
|
"signal/brier_reward/centered_abs_mean": 0.1277672603726387,
|
|
"signal/brier_reward/group_std_mean": 0.1687987834215164,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015970907546579836,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015970907546579836,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037500524520874025,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.057621393352746964,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004687565565109253,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004687565565109253,
|
|
"signal/format_reward/centered_abs_mean": 0.01510416679084301,
|
|
"signal/format_reward/group_std_mean": 0.03117924928665161,
|
|
"signal/format_reward/group_zero_std_frac": 0.8611111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007552083395421505,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007552083395421505,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0054148219525814055,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.009956344775855542,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.69253116636537e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.69253116636537e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10388213694095612,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1445908635854721,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018594901077449322,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018594901077449322,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10351476073265076,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14410740435123442,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018529141088947654,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018529141088947654,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09183897525072098,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12883895337581636,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016439176397398114,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016439176397398114,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.054963266104459764,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07745110541582108,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009838424040935934,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009838424040935934,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05390466451644897,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07155173420906066,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009648935054428875,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009648935054428875,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10388213694095612,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1445908635854721,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018594901077449322,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018594901077449322,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006833387818187475,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009374895878136159,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008541734772734344,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008541734772734344,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19513551200965937,
|
|
"calibration/batch_distribution_entropy": 0.8311802037571339,
|
|
"calibration/buffer_distribution_entropy": 0.8109006063782959,
|
|
"calibration/confidence_entropy": 0.46980175720261697,
|
|
"calibration/coverage@0%": 0.006303574312695471,
|
|
"calibration/coverage@1%": 0.006303574312695471,
|
|
"calibration/coverage@10%": 0.15071783948397832,
|
|
"calibration/coverage@15%": 0.3612107233516245,
|
|
"calibration/coverage@20%": 0.5503501102900245,
|
|
"calibration/coverage@25%": 0.8174175607275824,
|
|
"calibration/coverage@30%": 0.9399248776958611,
|
|
"calibration/coverage@5%": 0.006303574312695471,
|
|
"calibration/ece": 0.09047163423754609,
|
|
"calibration/mean_confidence": 0.7242396246544298,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011545138888888884,
|
|
"completions/max_length": 3611.6,
|
|
"completions/max_terminated_length": 3611.6,
|
|
"completions/mean_length": 820.3759643554688,
|
|
"completions/mean_terminated_length": 829.886279296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 269.6,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 0.00045778934145346284,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0093,
|
|
"num_tokens": 320222096.0,
|
|
"reward": 1.0436462879180908,
|
|
"reward_std": 0.13221458494663238,
|
|
"rewards/accuracy_reward": 0.6639757037162781,
|
|
"rewards/brier_reward": 0.7856175661087036,
|
|
"rewards/confidence_uniqueness_reward": 0.9376022934913635,
|
|
"rewards/format_reward": 0.9881944537162781,
|
|
"rewards/frontier_aurc_reward": -0.004951791558414698,
|
|
"rewards/frontier_coverage_1": 0.009559250064194202,
|
|
"rewards/frontier_coverage_10": 0.009636179637163877,
|
|
"rewards/frontier_coverage_15": 0.010256279539316893,
|
|
"rewards/frontier_coverage_20": 0.019052751082926988,
|
|
"rewards/frontier_coverage_25": 0.04648147448897362,
|
|
"rewards/frontier_coverage_5": 0.009559250064194202,
|
|
"rewards/frontier_ece_reward": 0.0030080639291554688,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1520562082529068,
|
|
"signal/accuracy_reward/group_std_mean": 0.20364203155040742,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0760281041264534,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0760281041264534,
|
|
"signal/advantage_abs_mean": 0.09611336141824722,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09611336141824722,
|
|
"signal/advantage_pre_scale_std": 0.1688544750213623,
|
|
"signal/advantage_std": 0.1688544750213623,
|
|
"signal/brier_reward/centered_abs_mean": 0.13363418728113174,
|
|
"signal/brier_reward/group_std_mean": 0.17583012878894805,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016704273410141468,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016704273410141468,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03758625835180283,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05853767320513725,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004698282293975353,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004698282293975353,
|
|
"signal/format_reward/centered_abs_mean": 0.017816840298473836,
|
|
"signal/format_reward/group_std_mean": 0.03516379445791244,
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222208976745,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008908420149236918,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008908420149236918,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0064054221846163275,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.01180559191852808,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00011465705611044542,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00011465705611044542,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10133122354745865,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1425911009311676,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018138288287445903,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018138288287445903,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1010772556066513,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14227744638919831,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018092827638611197,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018092827638611197,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09595019370317459,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13558797985315324,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017175084445625543,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017175084445625543,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.060691657662391665,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08569863438606262,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010863807052373885,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010863807052373885,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05632963702082634,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0726559266448021,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010083004366606475,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010083004366606475,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10133122354745865,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1425911009311676,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018138288287445903,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018138288287445903,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007126413751393557,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009845777973532676,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008908017189241946,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008908017189241946,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23844121848045688,
|
|
"calibration/batch_distribution_entropy": 0.8294230120411807,
|
|
"calibration/buffer_distribution_entropy": 0.8332834369273272,
|
|
"calibration/confidence_entropy": 0.44966823648576637,
|
|
"calibration/coverage@0%": 0.02151823058491345,
|
|
"calibration/coverage@1%": 0.02151823058491345,
|
|
"calibration/coverage@10%": 0.09493209050163852,
|
|
"calibration/coverage@15%": 0.27207408387812093,
|
|
"calibration/coverage@20%": 0.39152462093675044,
|
|
"calibration/coverage@25%": 0.5437544963331418,
|
|
"calibration/coverage@30%": 0.7778851174934724,
|
|
"calibration/coverage@5%": 0.026767574416934448,
|
|
"calibration/ece": 0.12294854977726558,
|
|
"calibration/mean_confidence": 0.7225680540206456,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01137152777777779,
|
|
"completions/max_length": 3828.4,
|
|
"completions/max_terminated_length": 3828.4,
|
|
"completions/mean_length": 812.3688354492188,
|
|
"completions/mean_terminated_length": 821.8790405273437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 276.6,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 0.00042106854380108416,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 332645193.0,
|
|
"reward": 1.0644186973571776,
|
|
"reward_std": 0.1273445561528206,
|
|
"rewards/accuracy_reward": 0.6989583373069763,
|
|
"rewards/brier_reward": 0.8076271176338196,
|
|
"rewards/confidence_uniqueness_reward": 0.9371534705162048,
|
|
"rewards/format_reward": 0.9884548664093018,
|
|
"rewards/frontier_aurc_reward": -0.0034654760267585514,
|
|
"rewards/frontier_coverage_1": 0.010090233152732253,
|
|
"rewards/frontier_coverage_10": 0.010090233152732253,
|
|
"rewards/frontier_coverage_15": 0.010443723807111383,
|
|
"rewards/frontier_coverage_20": 0.020869026891887188,
|
|
"rewards/frontier_coverage_25": 0.06478311643004417,
|
|
"rewards/frontier_coverage_5": 0.010090233152732253,
|
|
"rewards/frontier_ece_reward": 0.003317009983584285,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14793836772441865,
|
|
"signal/accuracy_reward/group_std_mean": 0.1978321135044098,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.425,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07396918386220933,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07396918386220933,
|
|
"signal/advantage_abs_mean": 0.09218663424253463,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09218663424253463,
|
|
"signal/advantage_pre_scale_std": 0.16679736375808715,
|
|
"signal/advantage_std": 0.16679736375808715,
|
|
"signal/brier_reward/centered_abs_mean": 0.12936757355928422,
|
|
"signal/brier_reward/group_std_mean": 0.17085058093070984,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016170946694910527,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016170946694910527,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039029645174741744,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06023879200220108,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004878705646842718,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004878705646842718,
|
|
"signal/format_reward/centered_abs_mean": 0.018603515345603228,
|
|
"signal/format_reward/group_std_mean": 0.03493107426911592,
|
|
"signal/format_reward/group_zero_std_frac": 0.8583333253860473,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009301757672801614,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009301757672801614,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004749262239784002,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008506011310964822,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.501178963342682e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.501178963342682e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10415904968976974,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1452164024114609,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018644470255821943,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018644470255821943,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10415904968976974,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1452164024114609,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018644470255821943,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018644470255821943,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10013787895441055,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14012934863567353,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017924679443240166,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017924679443240166,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05490083321928978,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0765575885772705,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009827248635701836,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009827248635701836,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06228313967585564,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07965980023145676,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001114868139848113,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001114868139848113,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10415904968976974,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1452164024114609,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018644470255821943,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018644470255821943,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006574434693902731,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00883613433688879,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008218043367378414,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008218043367378414,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.213075250859452,
|
|
"calibration/batch_distribution_entropy": 0.8233432774907394,
|
|
"calibration/buffer_distribution_entropy": 0.8366827818336875,
|
|
"calibration/confidence_entropy": 0.4392960218291321,
|
|
"calibration/coverage@0%": 0.019067250061658745,
|
|
"calibration/coverage@1%": 0.019067250061658745,
|
|
"calibration/coverage@10%": 0.3197417803010817,
|
|
"calibration/coverage@15%": 0.4912859673812978,
|
|
"calibration/coverage@20%": 0.5355746522716303,
|
|
"calibration/coverage@25%": 0.58310620015022,
|
|
"calibration/coverage@30%": 0.6621681636503451,
|
|
"calibration/coverage@5%": 0.17917223693829917,
|
|
"calibration/ece": 0.14121752496570036,
|
|
"calibration/mean_confidence": 0.7174865376212494,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012934027777777768,
|
|
"completions/max_length": 3621.2,
|
|
"completions/max_terminated_length": 3621.2,
|
|
"completions/mean_length": 881.6501831054687,
|
|
"completions/mean_terminated_length": 893.253125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 270.2,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 0.0004654189106076956,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.0109,
|
|
"num_tokens": 345912139.0,
|
|
"reward": 1.0574634552001954,
|
|
"reward_std": 0.13282708525657655,
|
|
"rewards/accuracy_reward": 0.6893229246139526,
|
|
"rewards/brier_reward": 0.798139750957489,
|
|
"rewards/confidence_uniqueness_reward": 0.9334550976753235,
|
|
"rewards/format_reward": 0.9870659708976746,
|
|
"rewards/frontier_aurc_reward": -0.004001729190349579,
|
|
"rewards/frontier_coverage_1": 0.010269207740202546,
|
|
"rewards/frontier_coverage_10": 0.010370114585384727,
|
|
"rewards/frontier_coverage_15": 0.012277117744088174,
|
|
"rewards/frontier_coverage_20": 0.02485618032515049,
|
|
"rewards/frontier_coverage_25": 0.07025006264448166,
|
|
"rewards/frontier_coverage_5": 0.010269207740202546,
|
|
"rewards/frontier_ece_reward": 0.0033263738732784986,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1543891042470932,
|
|
"signal/accuracy_reward/group_std_mean": 0.2028920382261276,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0771945521235466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0771945521235466,
|
|
"signal/advantage_abs_mean": 0.09776623845100403,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09776623845100403,
|
|
"signal/advantage_pre_scale_std": 0.17359991371631622,
|
|
"signal/advantage_std": 0.17359991371631622,
|
|
"signal/brier_reward/centered_abs_mean": 0.1349114805459976,
|
|
"signal/brier_reward/group_std_mean": 0.17627350091934205,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0168639350682497,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0168639350682497,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039914284646511075,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.061895917356014254,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004989285580813884,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004989285580813884,
|
|
"signal/format_reward/centered_abs_mean": 0.02108832448720932,
|
|
"signal/format_reward/group_std_mean": 0.03968926072120667,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01054416224360466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01054416224360466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005396629869937897,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.009891701303422451,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.659966890467331e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.659966890467331e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1010685533285141,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1450372576713562,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018091270700097083,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018091270700097083,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10048999190330506,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14426515400409698,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017987707862630487,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017987707862630487,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0918548583984375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13288411647081375,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016442019026726485,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016442019026726485,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.053203088045120236,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07647181898355485,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009523351793177426,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009523351793177426,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06769980266690254,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0865550771355629,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012118263402953744,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012118263402953744,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1010685533285141,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1450372576713562,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018091270700097083,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018091270700097083,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006280233804136515,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008365536015480757,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007850292255170644,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007850292255170644,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 0.22214487372449288,
|
|
"eval_calibration/batch_distribution_entropy": 0.7755822830854329,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8471814964949228,
|
|
"eval_calibration/confidence_entropy": 0.4570869870657699,
|
|
"eval_calibration/coverage@0%": 0.11139112903225806,
|
|
"eval_calibration/coverage@1%": 0.11139112903225806,
|
|
"eval_calibration/coverage@10%": 0.20278897849462366,
|
|
"eval_calibration/coverage@15%": 0.3587029569892473,
|
|
"eval_calibration/coverage@20%": 0.683635752688172,
|
|
"eval_calibration/coverage@25%": 0.7795698924731184,
|
|
"eval_calibration/coverage@30%": 0.9895833333333334,
|
|
"eval_calibration/coverage@5%": 0.11139112903225806,
|
|
"eval_calibration/ece": 0.19708409512610767,
|
|
"eval_calibration/mean_confidence": 0.7251063188085235,
|
|
"eval_completions/clipped_ratio": 0.006944444444444438,
|
|
"eval_completions/max_length": 2807.0,
|
|
"eval_completions/max_terminated_length": 2807.0,
|
|
"eval_completions/mean_length": 860.0338643391927,
|
|
"eval_completions/mean_terminated_length": 866.151621500651,
|
|
"eval_completions/min_length": 124.66666666666667,
|
|
"eval_completions/min_terminated_length": 340.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 345912139.0,
|
|
"eval_reward": 1.0506399472554524,
|
|
"eval_reward_std": 0.25993067771196365,
|
|
"eval_rewards/accuracy_reward": 0.6848958333333334,
|
|
"eval_rewards/brier_reward": 0.7940681974093119,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8835298220316569,
|
|
"eval_rewards/format_reward": 0.9921875,
|
|
"eval_rewards/frontier_aurc_reward": -0.00451858372737964,
|
|
"eval_rewards/frontier_coverage_1": 0.007746024794566135,
|
|
"eval_rewards/frontier_coverage_10": 0.0080074449069798,
|
|
"eval_rewards/frontier_coverage_15": 0.010937723660996804,
|
|
"eval_rewards/frontier_coverage_20": 0.02070824522525072,
|
|
"eval_rewards/frontier_coverage_25": 0.06115776486694813,
|
|
"eval_rewards/frontier_coverage_5": 0.007746024794566135,
|
|
"eval_rewards/frontier_ece_reward": 0.0031805916223675013,
|
|
"eval_runtime": 197.7756,
|
|
"eval_samples_per_second": 5.056,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4182400206724803,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4636932412783305,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20912001033624014,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20912001033624014,
|
|
"eval_signal/advantage_abs_mean": 0.22610543916622797,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22610543916622797,
|
|
"eval_signal/advantage_pre_scale_std": 0.2583668604493141,
|
|
"eval_signal/advantage_std": 0.2583668604493141,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21914813170830408,
|
|
"eval_signal/brier_reward/group_std_mean": 0.27482346693674725,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02739351646353801,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02739351646353801,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.055060590306917824,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08307173289358616,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006882573788364728,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006882573788364728,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.015028211598594984,
|
|
"eval_signal/format_reward/group_std_mean": 0.041204764818151794,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007514105799297492,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007514105799297492,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007855243321197728,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.01771801950720449,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001406088534470958,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001406088534470958,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1449227419992288,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.24502811332543692,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002594117036399742,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002594117036399742,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.14424370601773262,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.24401270101467767,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002581962267868221,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002581962267868221,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.1283226286371549,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.2211132695277532,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002296974960093697,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002296974960093697,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0656218013415734,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.11319748063882192,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011746301994814228,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011746301994814228,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09421608969569206,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.11541344473759334,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016864680025416117,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016864680025416117,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1449227419992288,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.24502811332543692,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002594117036399742,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002594117036399742,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006373878801241517,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.00929814165768524,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007967348501551896,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007967348501551896,
|
|
"eval_steps_per_second": 0.03,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1394606443752609,
|
|
"calibration/batch_distribution_entropy": 0.7936142557094252,
|
|
"calibration/buffer_distribution_entropy": 0.845419702343172,
|
|
"calibration/confidence_entropy": 0.41882709613499725,
|
|
"calibration/coverage@0%": 0.017655267348676385,
|
|
"calibration/coverage@1%": 0.017655267348676385,
|
|
"calibration/coverage@10%": 0.5253642503070282,
|
|
"calibration/coverage@15%": 0.6576306287853386,
|
|
"calibration/coverage@20%": 0.7686752423003841,
|
|
"calibration/coverage@25%": 0.8230803931486064,
|
|
"calibration/coverage@30%": 0.9062827225130891,
|
|
"calibration/coverage@5%": 0.21733968716937588,
|
|
"calibration/ece": 0.1140321937468661,
|
|
"calibration/mean_confidence": 0.7414189853264833,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011979166666666674,
|
|
"completions/max_length": 3721.4,
|
|
"completions/max_terminated_length": 3721.4,
|
|
"completions/mean_length": 836.39609375,
|
|
"completions/mean_terminated_length": 846.6472778320312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 274.2,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.00045802563545294106,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0107,
|
|
"num_tokens": 358655134.0,
|
|
"reward": 1.089371681213379,
|
|
"reward_std": 0.13748800307512282,
|
|
"rewards/accuracy_reward": 0.7427951455116272,
|
|
"rewards/brier_reward": 0.8266907215118409,
|
|
"rewards/confidence_uniqueness_reward": 0.9327721953392029,
|
|
"rewards/format_reward": 0.9880208253860474,
|
|
"rewards/frontier_aurc_reward": -0.0032799826469272373,
|
|
"rewards/frontier_coverage_1": 0.007008756510913372,
|
|
"rewards/frontier_coverage_10": 0.007702544890344143,
|
|
"rewards/frontier_coverage_15": 0.011821538442745805,
|
|
"rewards/frontier_coverage_20": 0.04276132583618164,
|
|
"rewards/frontier_coverage_25": 0.1319062441587448,
|
|
"rewards/frontier_coverage_5": 0.007008756510913372,
|
|
"rewards/frontier_ece_reward": 0.0029005682095885276,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16427408754825593,
|
|
"signal/accuracy_reward/group_std_mean": 0.21161983013153077,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08213704377412796,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08213704377412796,
|
|
"signal/advantage_abs_mean": 0.1013274610042572,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1013274610042572,
|
|
"signal/advantage_pre_scale_std": 0.1804036021232605,
|
|
"signal/advantage_std": 0.1804036021232605,
|
|
"signal/brier_reward/centered_abs_mean": 0.13167781829833985,
|
|
"signal/brier_reward/group_std_mean": 0.17299252152442932,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016459727287292482,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016459727287292482,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04091334193944931,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06404241994023323,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005114167742431164,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005114167742431164,
|
|
"signal/format_reward/centered_abs_mean": 0.02075737863779068,
|
|
"signal/format_reward/group_std_mean": 0.04021854251623154,
|
|
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01037868931889534,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01037868931889534,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004709955211728812,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008472612965852023,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.430819725617767e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.430819725617767e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09957832992076873,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.14525699019432067,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017824520589783787,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017824520589783787,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09750574976205825,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1425594985485077,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017453528707847,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017453528707847,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07561995238065719,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11277424991130829,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013535971054807305,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013535971054807305,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04978926405310631,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06711633205413818,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008912277640774846,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008912277640774846,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09789690375328064,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12379591763019562,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017523544374853373,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017523544374853373,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09957832992076873,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.14525699019432067,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017824520589783787,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017824520589783787,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005051241349428892,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006803230196237564,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006314051686786115,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006314051686786115,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18785021343328706,
|
|
"calibration/batch_distribution_entropy": 0.6710961452816249,
|
|
"calibration/buffer_distribution_entropy": 0.8140405972224055,
|
|
"calibration/confidence_entropy": 0.35810035374049687,
|
|
"calibration/coverage@0%": 0.023060960861917328,
|
|
"calibration/coverage@1%": 0.023060960861917328,
|
|
"calibration/coverage@10%": 0.42201104881266494,
|
|
"calibration/coverage@15%": 0.5009179859278804,
|
|
"calibration/coverage@20%": 0.721875,
|
|
"calibration/coverage@25%": 0.790625,
|
|
"calibration/coverage@30%": 0.8,
|
|
"calibration/coverage@5%": 0.1405123131046614,
|
|
"calibration/ece": 0.12457323190466667,
|
|
"calibration/mean_confidence": 0.8050869567093308,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01362847222222221,
|
|
"completions/max_length": 3826.4,
|
|
"completions/max_terminated_length": 3826.4,
|
|
"completions/mean_length": 839.6751708984375,
|
|
"completions/mean_terminated_length": 851.2668823242187,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 271.2,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 0.00044471403816714883,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0123,
|
|
"num_tokens": 371415488.0,
|
|
"reward": 1.048078441619873,
|
|
"reward_std": 0.13773128092288972,
|
|
"rewards/accuracy_reward": 0.6730902791023254,
|
|
"rewards/brier_reward": 0.7841872692108154,
|
|
"rewards/confidence_uniqueness_reward": 0.9249788880348205,
|
|
"rewards/format_reward": 0.9861979126930237,
|
|
"rewards/frontier_aurc_reward": -0.00579830389469862,
|
|
"rewards/frontier_coverage_1": 0.021006893925368787,
|
|
"rewards/frontier_coverage_10": 0.021663734689354897,
|
|
"rewards/frontier_coverage_15": 0.0223736809566617,
|
|
"rewards/frontier_coverage_20": 0.03696827031672001,
|
|
"rewards/frontier_coverage_25": 0.12879179567098617,
|
|
"rewards/frontier_coverage_5": 0.021024198178201912,
|
|
"rewards/frontier_ece_reward": 0.003077392978593707,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15486110746860504,
|
|
"signal/accuracy_reward/group_std_mean": 0.2035621464252472,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07743055373430252,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07743055373430252,
|
|
"signal/advantage_abs_mean": 0.10096549987792969,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10096549987792969,
|
|
"signal/advantage_pre_scale_std": 0.17911962866783143,
|
|
"signal/advantage_std": 0.17911962866783143,
|
|
"signal/brier_reward/centered_abs_mean": 0.14338811337947846,
|
|
"signal/brier_reward/group_std_mean": 0.18627047538757324,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017923514172434808,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017923514172434808,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04587777331471443,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07106368690729141,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005734721664339304,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005734721664339304,
|
|
"signal/format_reward/centered_abs_mean": 0.02301974855363369,
|
|
"signal/format_reward/group_std_mean": 0.04283802658319473,
|
|
"signal/format_reward/group_zero_std_frac": 0.825,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011509874276816845,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011509874276816845,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.007479064725339412,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.013051173277199268,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001338752525043674,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001338752525043674,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09618723690509796,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13724965155124663,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017217515734955669,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017217515734955669,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09364039450883865,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1338651016354561,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016761629842221737,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016761629842221737,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0792768731713295,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11451553106307984,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001419055974110961,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001419055974110961,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.050817693769931796,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06823742240667344,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009096366818994283,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009096366818994283,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1097530648112297,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14021052420139313,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019645798252895474,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019645798252895474,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09607678651809692,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1371078222990036,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017197745153680443,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017197745153680443,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005614162608981133,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0075827624648809435,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007017703261226416,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007017703261226416,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21941487360333833,
|
|
"calibration/batch_distribution_entropy": 0.7714647539506028,
|
|
"calibration/buffer_distribution_entropy": 0.7886613300284808,
|
|
"calibration/confidence_entropy": 0.3824705529750695,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.1962694417442022,
|
|
"calibration/coverage@15%": 0.4322914568187518,
|
|
"calibration/coverage@20%": 0.5052041737994015,
|
|
"calibration/coverage@25%": 0.5948411738398793,
|
|
"calibration/coverage@30%": 0.7651094471992914,
|
|
"calibration/coverage@5%": 0.1105263157894737,
|
|
"calibration/ece": 0.13094677855056783,
|
|
"calibration/mean_confidence": 0.732904433164731,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01909722222222221,
|
|
"completions/max_length": 3621.4,
|
|
"completions/max_terminated_length": 3621.4,
|
|
"completions/mean_length": 862.18828125,
|
|
"completions/mean_terminated_length": 879.2411743164063,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 278.4,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 0.000504399067722261,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.0156,
|
|
"num_tokens": 384486969.0,
|
|
"reward": 1.0396140456199645,
|
|
"reward_std": 0.13989297300577164,
|
|
"rewards/accuracy_reward": 0.6612847208976745,
|
|
"rewards/brier_reward": 0.7844983577728272,
|
|
"rewards/confidence_uniqueness_reward": 0.919456148147583,
|
|
"rewards/format_reward": 0.9807291746139526,
|
|
"rewards/frontier_aurc_reward": -0.004816135391592979,
|
|
"rewards/frontier_coverage_1": 0.03149934858083725,
|
|
"rewards/frontier_coverage_10": 0.03149934858083725,
|
|
"rewards/frontier_coverage_15": 0.029880692809820177,
|
|
"rewards/frontier_coverage_20": 0.03761248253285885,
|
|
"rewards/frontier_coverage_25": 0.1226318396627903,
|
|
"rewards/frontier_coverage_5": 0.03149934858083725,
|
|
"rewards/frontier_ece_reward": 0.004833784187212586,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15111762285232544,
|
|
"signal/accuracy_reward/group_std_mean": 0.2020161658525467,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07555881142616272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07555881142616272,
|
|
"signal/advantage_abs_mean": 0.10153342485427856,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10153342485427856,
|
|
"signal/advantage_pre_scale_std": 0.181088188290596,
|
|
"signal/advantage_std": 0.181088188290596,
|
|
"signal/brier_reward/centered_abs_mean": 0.14908890426158905,
|
|
"signal/brier_reward/group_std_mean": 0.1965240716934204,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01863611303269863,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01863611303269863,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.051202216744422914,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07755682170391083,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006400277093052864,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006400277093052864,
|
|
"signal/format_reward/centered_abs_mean": 0.02900390625,
|
|
"signal/format_reward/group_std_mean": 0.0507274828851223,
|
|
"signal/format_reward/group_zero_std_frac": 0.8027777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014501953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.014501953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.006489654909819365,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.011499019339680672,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001161648178822361,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001161648178822361,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10610861033201217,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15249101519584657,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001899344054982066,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001899344054982066,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10610861033201217,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15249101519584657,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001899344054982066,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001899344054982066,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09463098794221877,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13721374273300171,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016938945977017284,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016938945977017284,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06229153722524643,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08806440830230713,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011150184785947205,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011150184785947205,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09881225526332856,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12897567003965377,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017687393119558692,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017687393119558692,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10610861033201217,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15249101519584657,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001899344054982066,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001899344054982066,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007002682704478502,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009100762195885181,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008753353380598128,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008753353380598128,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16166878537469914,
|
|
"calibration/batch_distribution_entropy": 0.7354266691642397,
|
|
"calibration/buffer_distribution_entropy": 0.7575519463431387,
|
|
"calibration/confidence_entropy": 0.37874483129131903,
|
|
"calibration/coverage@0%": 0.02202937983537398,
|
|
"calibration/coverage@1%": 0.02202937983537398,
|
|
"calibration/coverage@10%": 0.19157356639320605,
|
|
"calibration/coverage@15%": 0.6136701427359228,
|
|
"calibration/coverage@20%": 0.7577182526254914,
|
|
"calibration/coverage@25%": 0.8434017263336635,
|
|
"calibration/coverage@30%": 0.9558143483797934,
|
|
"calibration/coverage@5%": 0.06191477374228745,
|
|
"calibration/ece": 0.11013321716453353,
|
|
"calibration/mean_confidence": 0.7809981872862709,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016840277777777746,
|
|
"completions/max_length": 3700.0,
|
|
"completions/max_terminated_length": 3700.0,
|
|
"completions/mean_length": 844.8034912109375,
|
|
"completions/mean_terminated_length": 859.2811767578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 288.2,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 0.0004786611534655094,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0159,
|
|
"num_tokens": 397308289.0,
|
|
"reward": 1.0684638977050782,
|
|
"reward_std": 0.1363822802901268,
|
|
"rewards/accuracy_reward": 0.7136284589767456,
|
|
"rewards/brier_reward": 0.8065288186073303,
|
|
"rewards/confidence_uniqueness_reward": 0.9220426321029663,
|
|
"rewards/format_reward": 0.9829861044883728,
|
|
"rewards/frontier_aurc_reward": -0.0036202599760144947,
|
|
"rewards/frontier_coverage_1": 0.012478399742394685,
|
|
"rewards/frontier_coverage_10": 0.012478399742394685,
|
|
"rewards/frontier_coverage_15": 0.013967123441398143,
|
|
"rewards/frontier_coverage_20": 0.030986898019909857,
|
|
"rewards/frontier_coverage_25": 0.10951882898807526,
|
|
"rewards/frontier_coverage_5": 0.012478399742394685,
|
|
"rewards/frontier_ece_reward": 0.005718720983713866,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1496690556406975,
|
|
"signal/accuracy_reward/group_std_mean": 0.20075952112674714,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4194444537162781,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07483452782034875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07483452782034875,
|
|
"signal/advantage_abs_mean": 0.09796086251735688,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09796086251735688,
|
|
"signal/advantage_pre_scale_std": 0.17858160734176637,
|
|
"signal/advantage_std": 0.17858160734176637,
|
|
"signal/brier_reward/centered_abs_mean": 0.14328038990497588,
|
|
"signal/brier_reward/group_std_mean": 0.18521734774112703,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017910048738121985,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017910048738121985,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05042630434036255,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07379210442304611,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006303288042545319,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006303288042545319,
|
|
"signal/format_reward/centered_abs_mean": 0.02784288227558136,
|
|
"signal/format_reward/group_std_mean": 0.04672937579452992,
|
|
"signal/format_reward/group_zero_std_frac": 0.8277777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01392144113779068,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01392144113779068,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005079053156077862,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.009023398347198962,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.091504471143707e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.091504471143707e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10737672746181488,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1529840499162674,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019220433663576842,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019220433663576842,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10737672746181488,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1529840499162674,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019220433663576842,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019220433663576842,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09843996018171311,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14119636714458467,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017620753031224012,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017620753031224012,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06077901348471641,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08596007078886032,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010879443609155715,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010879443609155715,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08554520159959793,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10874855667352676,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015312590170651675,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015312590170651675,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10737672746181488,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1529840499162674,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019220433663576842,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019220433663576842,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007207877747714519,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0095659539103508,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009009847184643149,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009009847184643149,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12835975236328778,
|
|
"calibration/batch_distribution_entropy": 0.821059537069926,
|
|
"calibration/buffer_distribution_entropy": 0.7583996288756972,
|
|
"calibration/confidence_entropy": 0.4081814972315116,
|
|
"calibration/coverage@0%": 0.00862533692722372,
|
|
"calibration/coverage@1%": 0.00862533692722372,
|
|
"calibration/coverage@10%": 0.33159990384193466,
|
|
"calibration/coverage@15%": 0.7465159898081857,
|
|
"calibration/coverage@20%": 0.8805055999149627,
|
|
"calibration/coverage@25%": 0.9511387978142076,
|
|
"calibration/coverage@30%": 0.9962666666666667,
|
|
"calibration/coverage@5%": 0.21060197663971247,
|
|
"calibration/ece": 0.084675956737041,
|
|
"calibration/mean_confidence": 0.7221457613282724,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01944444444444444,
|
|
"completions/max_length": 3881.0,
|
|
"completions/max_terminated_length": 3881.0,
|
|
"completions/mean_length": 882.5856811523438,
|
|
"completions/mean_terminated_length": 900.1067504882812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 265.2,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 0.0004372715193312615,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.0184,
|
|
"num_tokens": 410583644.0,
|
|
"reward": 1.0630595207214355,
|
|
"reward_std": 0.1402587652206421,
|
|
"rewards/accuracy_reward": 0.7052951455116272,
|
|
"rewards/brier_reward": 0.801636004447937,
|
|
"rewards/confidence_uniqueness_reward": 0.9212079167366027,
|
|
"rewards/format_reward": 0.9804687619209289,
|
|
"rewards/frontier_aurc_reward": -0.003202560031786561,
|
|
"rewards/frontier_coverage_1": 0.009410932660102844,
|
|
"rewards/frontier_coverage_10": 0.00938644101843238,
|
|
"rewards/frontier_coverage_15": 0.011940532876178623,
|
|
"rewards/frontier_coverage_20": 0.04426906034350395,
|
|
"rewards/frontier_coverage_25": 0.15708873867988588,
|
|
"rewards/frontier_coverage_5": 0.009410932660102844,
|
|
"rewards/frontier_ece_reward": 0.0044516745489090685,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15396592915058135,
|
|
"signal/accuracy_reward/group_std_mean": 0.20630868673324584,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07698296457529068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07698296457529068,
|
|
"signal/advantage_abs_mean": 0.10177666842937469,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10177666842937469,
|
|
"signal/advantage_pre_scale_std": 0.18463847935199737,
|
|
"signal/advantage_std": 0.18463847935199737,
|
|
"signal/brier_reward/centered_abs_mean": 0.1451348751783371,
|
|
"signal/brier_reward/group_std_mean": 0.1891576647758484,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018141859397292136,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018141859397292136,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05311574935913086,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07745091170072556,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006639468669891358,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006639468669891358,
|
|
"signal/format_reward/centered_abs_mean": 0.03104926161468029,
|
|
"signal/format_reward/group_std_mean": 0.05074087902903557,
|
|
"signal/format_reward/group_zero_std_frac": 0.8166666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015524630807340145,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015524630807340145,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004582889564335346,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008765554707497358,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.203371689887717e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.203371689887717e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11115374714136124,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15887077152729034,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001989651983603835,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001989651983603835,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11011287569999695,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15743386447429658,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001971020409837365,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001971020409837365,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09279258996248245,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13348000943660737,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001660987362265587,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001660987362265587,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06362280994653702,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08751891702413558,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011388482293114067,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011388482293114067,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10877040922641754,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13969840705394745,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019469902152195572,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019469902152195572,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11115374714136124,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15887077152729034,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001989651983603835,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001989651983603835,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007770705409348011,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010677088052034378,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009713381761685014,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009713381761685014,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.11867554556626558,
|
|
"calibration/batch_distribution_entropy": 0.7719426589599478,
|
|
"calibration/buffer_distribution_entropy": 0.7868053149641373,
|
|
"calibration/confidence_entropy": 0.3982474085000685,
|
|
"calibration/coverage@0%": 0.04073107049608355,
|
|
"calibration/coverage@1%": 0.04073107049608355,
|
|
"calibration/coverage@10%": 0.42235769120170313,
|
|
"calibration/coverage@15%": 0.7863377057034178,
|
|
"calibration/coverage@20%": 0.9337981654542539,
|
|
"calibration/coverage@25%": 0.9724137931034482,
|
|
"calibration/coverage@30%": 0.9946949602122016,
|
|
"calibration/coverage@5%": 0.11290462955164378,
|
|
"calibration/ece": 0.0809440485719725,
|
|
"calibration/mean_confidence": 0.7680337505150759,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02213541666666665,
|
|
"completions/max_length": 3965.0,
|
|
"completions/max_terminated_length": 3965.0,
|
|
"completions/mean_length": 863.0920166015625,
|
|
"completions/mean_terminated_length": 882.6232299804688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 304.4,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 0.00038124839193187654,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.0194,
|
|
"num_tokens": 423626432.0,
|
|
"reward": 1.0619913816452027,
|
|
"reward_std": 0.1390293389558792,
|
|
"rewards/accuracy_reward": 0.7075520753860474,
|
|
"rewards/brier_reward": 0.7912116408348083,
|
|
"rewards/confidence_uniqueness_reward": 0.9133248925209045,
|
|
"rewards/format_reward": 0.9776909589767456,
|
|
"rewards/frontier_aurc_reward": -0.004389631748199463,
|
|
"rewards/frontier_coverage_1": 0.007734634727239609,
|
|
"rewards/frontier_coverage_10": 0.008618967141956091,
|
|
"rewards/frontier_coverage_15": 0.021352371852844953,
|
|
"rewards/frontier_coverage_20": 0.06736778169870376,
|
|
"rewards/frontier_coverage_25": 0.21702924072742463,
|
|
"rewards/frontier_coverage_5": 0.007734634727239609,
|
|
"rewards/frontier_ece_reward": 0.003818414593115449,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14320203959941863,
|
|
"signal/accuracy_reward/group_std_mean": 0.19457052350044252,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07160101979970931,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07160101979970931,
|
|
"signal/advantage_abs_mean": 0.09958047866821289,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09958047866821289,
|
|
"signal/advantage_pre_scale_std": 0.18535009622573853,
|
|
"signal/advantage_std": 0.18535009622573853,
|
|
"signal/brier_reward/centered_abs_mean": 0.14485330879688263,
|
|
"signal/brier_reward/group_std_mean": 0.19100603461265564,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01810666359961033,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01810666359961033,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05692942887544632,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0827468365430832,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00711617860943079,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00711617860943079,
|
|
"signal/format_reward/centered_abs_mean": 0.03310004323720932,
|
|
"signal/format_reward/group_std_mean": 0.05456235036253929,
|
|
"signal/format_reward/group_zero_std_frac": 0.7972222208976746,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01655002161860466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01655002161860466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005390328448265791,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.009165607579052448,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.648687555454671e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.648687555454671e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1076874241232872,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15496502816677094,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019276048755273224,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019276048755273224,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.104564568400383,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15102957487106322,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018717057770118116,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018717057770118116,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07957575172185898,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11534264981746674,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014244059333577753,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014244059333577753,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07137952968478203,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09536249935626984,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012776935007423162,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012776935007423162,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14418553411960602,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18653603494167328,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025809210259467364,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025809210259467364,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1076874241232872,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15496502816677094,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019276048755273224,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019276048755273224,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008868717961013317,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012341622821986675,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011085897451266647,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011085897451266647,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17966311621561074,
|
|
"calibration/batch_distribution_entropy": 0.7566981674049396,
|
|
"calibration/buffer_distribution_entropy": 0.7915198321155537,
|
|
"calibration/confidence_entropy": 0.39772641085626637,
|
|
"calibration/coverage@0%": 0.013611198447090345,
|
|
"calibration/coverage@1%": 0.013611198447090345,
|
|
"calibration/coverage@10%": 0.085222752313644,
|
|
"calibration/coverage@15%": 0.34522302571323116,
|
|
"calibration/coverage@20%": 0.8419730062027713,
|
|
"calibration/coverage@25%": 0.9279373368146213,
|
|
"calibration/coverage@30%": 0.9514360313315926,
|
|
"calibration/coverage@5%": 0.035600727242901864,
|
|
"calibration/ece": 0.12424600098394474,
|
|
"calibration/mean_confidence": 0.7696269697210354,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01918402777777777,
|
|
"completions/max_length": 3683.6,
|
|
"completions/max_terminated_length": 3683.6,
|
|
"completions/mean_length": 860.390283203125,
|
|
"completions/mean_terminated_length": 877.1977905273437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 264.0,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.00042491775820963085,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.0157,
|
|
"num_tokens": 436628176.0,
|
|
"reward": 1.0528750896453858,
|
|
"reward_std": 0.14500550627708436,
|
|
"rewards/accuracy_reward": 0.6854166746139526,
|
|
"rewards/brier_reward": 0.7892068386077881,
|
|
"rewards/confidence_uniqueness_reward": 0.9187054872512818,
|
|
"rewards/format_reward": 0.9807291626930237,
|
|
"rewards/frontier_aurc_reward": -0.0036725443322211506,
|
|
"rewards/frontier_coverage_1": 0.01439770613797009,
|
|
"rewards/frontier_coverage_10": 0.016149942949414253,
|
|
"rewards/frontier_coverage_15": 0.026541436091065407,
|
|
"rewards/frontier_coverage_20": 0.07000061422586441,
|
|
"rewards/frontier_coverage_25": 0.18287851214408873,
|
|
"rewards/frontier_coverage_5": 0.01439770613797009,
|
|
"rewards/frontier_ece_reward": 0.004581967741250992,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15631510615348815,
|
|
"signal/accuracy_reward/group_std_mean": 0.2074061244726181,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07815755307674407,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07815755307674407,
|
|
"signal/advantage_abs_mean": 0.10565940588712693,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10565940588712693,
|
|
"signal/advantage_pre_scale_std": 0.18545168936252593,
|
|
"signal/advantage_std": 0.18545168936252593,
|
|
"signal/brier_reward/centered_abs_mean": 0.14972881078720093,
|
|
"signal/brier_reward/group_std_mean": 0.1930097758769989,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018716101348400117,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018716101348400117,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05400483831763268,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0815995305776596,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006750604789704085,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006750604789704085,
|
|
"signal/format_reward/centered_abs_mean": 0.03107638955116272,
|
|
"signal/format_reward/group_std_mean": 0.05420016869902611,
|
|
"signal/format_reward/group_zero_std_frac": 0.7916666746139527,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01553819477558136,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01553819477558136,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004669300001114607,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00808509076014161,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.358046834473498e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.358046834473498e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11330873966217041,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1614879548549652,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020282263401895763,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020282263401895763,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10939195156097412,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15617263913154603,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019581159343943,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019581159343943,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09606794416904449,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1361277371644974,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017196161206811666,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017196161206811666,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08031494021415711,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10763168931007386,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014376373728737235,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014376373728737235,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13603066951036452,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1738019824028015,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002434948831796646,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002434948831796646,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11330873966217041,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1614879548549652,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020282263401895763,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020282263401895763,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009635485522449017,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013568679615855218,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012044356903061272,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012044356903061272,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20223318880699964,
|
|
"calibration/batch_distribution_entropy": 0.7491589023246843,
|
|
"calibration/buffer_distribution_entropy": 0.7953545225235228,
|
|
"calibration/confidence_entropy": 0.38962672073018173,
|
|
"calibration/coverage@0%": 0.015676818892647274,
|
|
"calibration/coverage@1%": 0.015676818892647274,
|
|
"calibration/coverage@10%": 0.20546013155266002,
|
|
"calibration/coverage@15%": 0.3649979169559784,
|
|
"calibration/coverage@20%": 0.5088098875156228,
|
|
"calibration/coverage@25%": 0.8386324237374438,
|
|
"calibration/coverage@30%": 0.9468667546174142,
|
|
"calibration/coverage@5%": 0.056930082600219076,
|
|
"calibration/ece": 0.11801808575784636,
|
|
"calibration/mean_confidence": 0.7763912756629218,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014930555555555558,
|
|
"completions/max_length": 3731.2,
|
|
"completions/max_terminated_length": 3731.2,
|
|
"completions/mean_length": 864.9324829101563,
|
|
"completions/mean_terminated_length": 878.110986328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 293.6,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.00039803129038773477,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0133,
|
|
"num_tokens": 449675142.0,
|
|
"reward": 1.0776456117630004,
|
|
"reward_std": 0.1367618814110756,
|
|
"rewards/accuracy_reward": 0.7268229246139526,
|
|
"rewards/brier_reward": 0.8068367838859558,
|
|
"rewards/confidence_uniqueness_reward": 0.9222909212112427,
|
|
"rewards/format_reward": 0.9850694537162781,
|
|
"rewards/frontier_aurc_reward": -0.003724863426759839,
|
|
"rewards/frontier_coverage_1": 0.002979634841904044,
|
|
"rewards/frontier_coverage_10": 0.0035558654461055996,
|
|
"rewards/frontier_coverage_15": 0.009191408194601536,
|
|
"rewards/frontier_coverage_20": 0.052768574655056,
|
|
"rewards/frontier_coverage_25": 0.2101300299167633,
|
|
"rewards/frontier_coverage_5": 0.002979634841904044,
|
|
"rewards/frontier_ece_reward": 0.004675363376736641,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15314127504825592,
|
|
"signal/accuracy_reward/group_std_mean": 0.20621364712715148,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3944444417953491,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07657063752412796,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07657063752412796,
|
|
"signal/advantage_abs_mean": 0.09666193425655364,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09666193425655364,
|
|
"signal/advantage_pre_scale_std": 0.17675647437572478,
|
|
"signal/advantage_std": 0.17675647437572478,
|
|
"signal/brier_reward/centered_abs_mean": 0.13968413770198823,
|
|
"signal/brier_reward/group_std_mean": 0.1854879915714264,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01746051721274853,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01746051721274853,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04815400689840317,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07337165027856826,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006019250862300396,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006019250862300396,
|
|
"signal/format_reward/centered_abs_mean": 0.02486979179084301,
|
|
"signal/format_reward/group_std_mean": 0.04627573862671852,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333253860474,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012434895895421506,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012434895895421506,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004705563187599182,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008424987457692623,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.422957907896488e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.422957907896488e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11363547295331955,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16121746897697448,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020340749295428397,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020340749295428397,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11250363886356354,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15969001799821853,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020138150779530407,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020138150779530407,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09940378814935684,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1407702460885048,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017793278209865093,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017793278209865093,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07035658955574035,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09374350309371948,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012593829305842519,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012593829305842519,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1395350754261017,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1781696170568466,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024976777844130994,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024976777844130994,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11363547295331955,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16121746897697448,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020340749295428397,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020340749295428397,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009676532447338104,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013482803851366043,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001209566555917263,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001209566555917263,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18695428884355583,
|
|
"calibration/batch_distribution_entropy": 0.8294959515627778,
|
|
"calibration/buffer_distribution_entropy": 0.7830615575341693,
|
|
"calibration/confidence_entropy": 0.4127994379638137,
|
|
"calibration/coverage@0%": 0.030000602464743547,
|
|
"calibration/coverage@1%": 0.030000602464743547,
|
|
"calibration/coverage@10%": 0.3726668073790661,
|
|
"calibration/coverage@15%": 0.47507091504284826,
|
|
"calibration/coverage@20%": 0.6033368934355202,
|
|
"calibration/coverage@25%": 0.6390882147110964,
|
|
"calibration/coverage@30%": 0.7727477070519446,
|
|
"calibration/coverage@5%": 0.14934237172733683,
|
|
"calibration/ece": 0.12225915444458749,
|
|
"calibration/mean_confidence": 0.7202413289371199,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02230902777777779,
|
|
"completions/max_length": 3692.6,
|
|
"completions/max_terminated_length": 3692.6,
|
|
"completions/mean_length": 885.7539184570312,
|
|
"completions/mean_terminated_length": 906.2072998046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 246.8,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.0003987095842603594,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.019,
|
|
"num_tokens": 462959891.0,
|
|
"reward": 1.0415829181671143,
|
|
"reward_std": 0.14480677247047424,
|
|
"rewards/accuracy_reward": 0.6694444417953491,
|
|
"rewards/brier_reward": 0.7821706771850586,
|
|
"rewards/confidence_uniqueness_reward": 0.9169838309288025,
|
|
"rewards/format_reward": 0.977430546283722,
|
|
"rewards/frontier_aurc_reward": -0.003745084721595049,
|
|
"rewards/frontier_coverage_1": 0.01643837634474039,
|
|
"rewards/frontier_coverage_10": 0.01651232047006488,
|
|
"rewards/frontier_coverage_15": 0.019644578453153372,
|
|
"rewards/frontier_coverage_20": 0.05048965364694595,
|
|
"rewards/frontier_coverage_25": 0.17186331748962402,
|
|
"rewards/frontier_coverage_5": 0.01643837634474039,
|
|
"rewards/frontier_ece_reward": 0.004818407958373428,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15354817509651184,
|
|
"signal/accuracy_reward/group_std_mean": 0.20701175928115845,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3944444417953491,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07677408754825592,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07677408754825592,
|
|
"signal/advantage_abs_mean": 0.10590840876102448,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10590840876102448,
|
|
"signal/advantage_pre_scale_std": 0.1864941358566284,
|
|
"signal/advantage_std": 0.1864941358566284,
|
|
"signal/brier_reward/centered_abs_mean": 0.15084939301013947,
|
|
"signal/brier_reward/group_std_mean": 0.1953912854194641,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018856174126267434,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018856174126267434,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05446743220090866,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07849968373775482,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006808429025113583,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006808429025113583,
|
|
"signal/format_reward/centered_abs_mean": 0.03295355886220932,
|
|
"signal/format_reward/group_std_mean": 0.05323704555630684,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01647677943110466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01647677943110466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004789613839238882,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008727262448519468,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.57340870425105e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.57340870425105e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10987022221088409,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16090194880962372,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019666770240291953,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019666770240291953,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10942451506853104,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16031207293272018,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019586987793445585,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019586987793445585,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09429361820220947,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13933112919330598,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001687855739146471,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001687855739146471,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06689911112189292,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09313002228736877,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001197494030930102,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001197494030930102,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12959368526935577,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1694766938686371,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00231972704641521,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00231972704641521,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10987022221088409,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16090194880962372,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019666770240291953,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019666770240291953,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009920213930308818,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014252552576363087,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012400267412886023,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012400267412886023,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16469432851019974,
|
|
"calibration/batch_distribution_entropy": 0.7597396959589091,
|
|
"calibration/buffer_distribution_entropy": 0.8007669131143469,
|
|
"calibration/confidence_entropy": 0.3969919203304937,
|
|
"calibration/coverage@0%": 0.024210526315789474,
|
|
"calibration/coverage@1%": 0.024210526315789474,
|
|
"calibration/coverage@10%": 0.30953425634416254,
|
|
"calibration/coverage@15%": 0.39944743749136624,
|
|
"calibration/coverage@20%": 0.8420339912280703,
|
|
"calibration/coverage@25%": 0.9285635964912281,
|
|
"calibration/coverage@30%": 0.9563157894736843,
|
|
"calibration/coverage@5%": 0.04684210526315789,
|
|
"calibration/ece": 0.12112572362046492,
|
|
"calibration/mean_confidence": 0.7741498976535474,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014496527777777768,
|
|
"completions/max_length": 3721.0,
|
|
"completions/max_terminated_length": 3721.0,
|
|
"completions/mean_length": 864.76953125,
|
|
"completions/mean_terminated_length": 877.4760375976563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 265.4,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.00045119517017155886,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0136,
|
|
"num_tokens": 475989844.0,
|
|
"reward": 1.0597358465194702,
|
|
"reward_std": 0.1393290489912033,
|
|
"rewards/accuracy_reward": 0.6921006798744201,
|
|
"rewards/brier_reward": 0.7960708856582641,
|
|
"rewards/confidence_uniqueness_reward": 0.9244546294212341,
|
|
"rewards/format_reward": 0.9854166746139527,
|
|
"rewards/frontier_aurc_reward": -0.004364499310031533,
|
|
"rewards/frontier_coverage_1": 0.016046756226569415,
|
|
"rewards/frontier_coverage_10": 0.016046756226569415,
|
|
"rewards/frontier_coverage_15": 0.01778736140113324,
|
|
"rewards/frontier_coverage_20": 0.04925883784890175,
|
|
"rewards/frontier_coverage_25": 0.1840776115655899,
|
|
"rewards/frontier_coverage_5": 0.016046756226569415,
|
|
"rewards/frontier_ece_reward": 0.005061939358711243,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15125325322151184,
|
|
"signal/accuracy_reward/group_std_mean": 0.2005963295698166,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07562662661075592,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07562662661075592,
|
|
"signal/advantage_abs_mean": 0.10048190951347351,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10048190951347351,
|
|
"signal/advantage_pre_scale_std": 0.17941418886184693,
|
|
"signal/advantage_std": 0.17941418886184693,
|
|
"signal/brier_reward/centered_abs_mean": 0.14328907430171967,
|
|
"signal/brier_reward/group_std_mean": 0.18907705545425416,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017911134287714958,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017911134287714958,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04496906101703644,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07179959863424301,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005621132627129555,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005621132627129555,
|
|
"signal/format_reward/centered_abs_mean": 0.02472873292863369,
|
|
"signal/format_reward/group_std_mean": 0.04880723804235458,
|
|
"signal/format_reward/group_zero_std_frac": 0.794444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012364366464316845,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012364366464316845,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005653998162597418,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.010000771470367908,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010120656515937299,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010120656515937299,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10320640355348587,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15092136859893798,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018473944626748561,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018473944626748561,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10320640355348587,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15092136859893798,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018473944626748561,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018473944626748561,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08790723532438278,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1300230875611305,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001573539455421269,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001573539455421269,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07248112261295318,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09901486486196517,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012974119978025555,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012974119978025555,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13645926415920256,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17553613483905792,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00244262064807117,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00244262064807117,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10320640355348587,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15092136859893798,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018473944626748561,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018473944626748561,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009132616408169269,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013096104748547078,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011415770510211586,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011415770510211586,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 0.1530627466072467,
|
|
"eval_calibration/batch_distribution_entropy": 0.7084686843773372,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8127786012531732,
|
|
"eval_calibration/confidence_entropy": 0.3849499401854783,
|
|
"eval_calibration/coverage@0%": 0.15826612903225806,
|
|
"eval_calibration/coverage@1%": 0.15826612903225806,
|
|
"eval_calibration/coverage@10%": 0.42002688172043007,
|
|
"eval_calibration/coverage@15%": 0.6705309139784946,
|
|
"eval_calibration/coverage@20%": 0.9114583333333334,
|
|
"eval_calibration/coverage@25%": 0.9479166666666666,
|
|
"eval_calibration/coverage@30%": 0.96875,
|
|
"eval_calibration/coverage@5%": 0.15826612903225806,
|
|
"eval_calibration/ece": 0.18083638855035544,
|
|
"eval_calibration/mean_confidence": 0.7608088995727863,
|
|
"eval_completions/clipped_ratio": 0.018229166666666668,
|
|
"eval_completions/max_length": 3308.1666666666665,
|
|
"eval_completions/max_terminated_length": 3308.1666666666665,
|
|
"eval_completions/mean_length": 879.684560139974,
|
|
"eval_completions/mean_terminated_length": 895.6973673502604,
|
|
"eval_completions/min_length": 65.5,
|
|
"eval_completions/min_terminated_length": 314.1666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 475989844.0,
|
|
"eval_reward": 1.0420524875322978,
|
|
"eval_reward_std": 0.2861000994841258,
|
|
"eval_rewards/accuracy_reward": 0.683159718910853,
|
|
"eval_rewards/brier_reward": 0.7857048312822977,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8660669028759003,
|
|
"eval_rewards/format_reward": 0.9782986144224802,
|
|
"eval_rewards/frontier_aurc_reward": -0.00444137768742318,
|
|
"eval_rewards/frontier_coverage_1": 0.012749733519740403,
|
|
"eval_rewards/frontier_coverage_10": 0.012749733519740403,
|
|
"eval_rewards/frontier_coverage_15": 0.01225113959905381,
|
|
"eval_rewards/frontier_coverage_20": 0.0417734415580829,
|
|
"eval_rewards/frontier_coverage_25": 0.1467849425971508,
|
|
"eval_rewards/frontier_coverage_5": 0.012749733519740403,
|
|
"eval_rewards/frontier_ece_reward": 0.005217449079888563,
|
|
"eval_runtime": 218.0299,
|
|
"eval_samples_per_second": 4.587,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.42138671875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4653974175453186,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.210693359375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.210693359375,
|
|
"eval_signal/advantage_abs_mean": 0.24256180475155512,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.24256180475155512,
|
|
"eval_signal/advantage_pre_scale_std": 0.285640483101209,
|
|
"eval_signal/advantage_std": 0.285640483101209,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2427068774898847,
|
|
"eval_signal/brier_reward/group_std_mean": 0.3084608018398285,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030338359686235588,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.030338359686235588,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0698917464663585,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1197894203166167,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008736468308294812,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008736468308294812,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.04085286473855376,
|
|
"eval_signal/format_reward/group_std_mean": 0.09480424628903468,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.5555555721124014,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02042643236927688,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.02042643236927688,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00753099766249458,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.016231194448967774,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013480485601273054,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013480485601273054,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1466109355290731,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.2583803633848826,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026243358151987195,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026243358151987195,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.1466109355290731,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.2583803633848826,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026243358151987195,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026243358151987195,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.12884864211082458,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.23108715812365213,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002306390592517952,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002306390592517952,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10982246572772662,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.16773877292871475,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019658220505031445,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019658220505031445,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.21963715553283691,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.26794109493494034,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003931504984696706,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003931504984696706,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1466109355290731,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.2583803633848826,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026243358151987195,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026243358151987195,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.011491452964643637,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.01822316941494743,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014364316205804546,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014364316205804546,
|
|
"eval_steps_per_second": 0.028,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21459663503852902,
|
|
"calibration/batch_distribution_entropy": 0.771657696018665,
|
|
"calibration/buffer_distribution_entropy": 0.8062133912870209,
|
|
"calibration/confidence_entropy": 0.39023679136762507,
|
|
"calibration/coverage@0%": 0.005278506853310003,
|
|
"calibration/coverage@1%": 0.005278506853310003,
|
|
"calibration/coverage@10%": 0.07723617881098196,
|
|
"calibration/coverage@15%": 0.33807708327146657,
|
|
"calibration/coverage@20%": 0.4888472274299046,
|
|
"calibration/coverage@25%": 0.7491479359146425,
|
|
"calibration/coverage@30%": 0.9136024663583718,
|
|
"calibration/coverage@5%": 0.005278506853310003,
|
|
"calibration/ece": 0.12685333087756695,
|
|
"calibration/mean_confidence": 0.7578144496622505,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015451388888888884,
|
|
"completions/max_length": 3512.2,
|
|
"completions/max_terminated_length": 3512.2,
|
|
"completions/mean_length": 889.34462890625,
|
|
"completions/mean_terminated_length": 903.3022705078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 253.0,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 0.0003861828299704939,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0133,
|
|
"num_tokens": 489301046.0,
|
|
"reward": 1.0838479042053222,
|
|
"reward_std": 0.13164580911397933,
|
|
"rewards/accuracy_reward": 0.7376736283302308,
|
|
"rewards/brier_reward": 0.817084789276123,
|
|
"rewards/confidence_uniqueness_reward": 0.922769570350647,
|
|
"rewards/format_reward": 0.9843750119209289,
|
|
"rewards/frontier_aurc_reward": -0.0028617044910788534,
|
|
"rewards/frontier_coverage_1": 0.0036607160232961177,
|
|
"rewards/frontier_coverage_10": 0.0036607160232961177,
|
|
"rewards/frontier_coverage_15": 0.008217979548498988,
|
|
"rewards/frontier_coverage_20": 0.04692377373576164,
|
|
"rewards/frontier_coverage_25": 0.19758630394935608,
|
|
"rewards/frontier_coverage_5": 0.0036607160232961177,
|
|
"rewards/frontier_ece_reward": 0.0053809239529073235,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14232856035232544,
|
|
"signal/accuracy_reward/group_std_mean": 0.19213833510875702,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44722222089767455,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07116428017616272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07116428017616272,
|
|
"signal/advantage_abs_mean": 0.09252178370952606,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09252178370952606,
|
|
"signal/advantage_pre_scale_std": 0.17391646802425384,
|
|
"signal/advantage_std": 0.17391646802425384,
|
|
"signal/brier_reward/centered_abs_mean": 0.13427632600069045,
|
|
"signal/brier_reward/group_std_mean": 0.1779688835144043,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016784540750086306,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016784540750086306,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04697767570614815,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07451951429247856,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005872209463268519,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005872209463268519,
|
|
"signal/format_reward/centered_abs_mean": 0.025531684421002863,
|
|
"signal/format_reward/group_std_mean": 0.04952741749584675,
|
|
"signal/format_reward/group_zero_std_frac": 0.7916666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012765842210501432,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012765842210501432,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0039027729537338017,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00706147076562047,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.985963555052876e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.985963555052876e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11220380365848541,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15874993205070495,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020084480987861753,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020084480987861753,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11220380365848541,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15874993205070495,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020084480987861753,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020084480987861753,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09757517576217652,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13811067789793013,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017465956043452024,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017465956043452024,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0743746891617775,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10126109570264816,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013313068542629481,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013313068542629481,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12186015099287033,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15898796319961547,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002181296655908227,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002181296655908227,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11220380365848541,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15874993205070495,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020084480987861753,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020084480987861753,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009247669950127602,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01250618938356638,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011559587437659502,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011559587437659502,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1501536281365257,
|
|
"calibration/batch_distribution_entropy": 0.7866720396909507,
|
|
"calibration/buffer_distribution_entropy": 0.8006953548191152,
|
|
"calibration/confidence_entropy": 0.4023190237959316,
|
|
"calibration/coverage@0%": 0.04461942257217847,
|
|
"calibration/coverage@1%": 0.04461942257217847,
|
|
"calibration/coverage@10%": 0.2860892388451443,
|
|
"calibration/coverage@15%": 0.458953429250663,
|
|
"calibration/coverage@20%": 0.7403910898572234,
|
|
"calibration/coverage@25%": 0.9528406462281219,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.19072615923009625,
|
|
"calibration/ece": 0.08832369333942824,
|
|
"calibration/mean_confidence": 0.7594015624133713,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011574074074074106,
|
|
"completions/max_length": 3409.3333333333335,
|
|
"completions/max_terminated_length": 3409.3333333333335,
|
|
"completions/mean_length": 896.012451171875,
|
|
"completions/mean_terminated_length": 906.8702392578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 265.3333333333333,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 497360332.0,
|
|
"reward": 1.0603783528010051,
|
|
"reward_std": 0.13254794230063757,
|
|
"rewards/accuracy_reward": 0.6896701256434122,
|
|
"rewards/brier_reward": 0.7949030995368958,
|
|
"rewards/confidence_uniqueness_reward": 0.9295975764592489,
|
|
"rewards/format_reward": 0.9884259303410848,
|
|
"rewards/frontier_aurc_reward": -0.00355257714788119,
|
|
"rewards/frontier_coverage_1": 0.011394298480202755,
|
|
"rewards/frontier_coverage_10": 0.010870846764494976,
|
|
"rewards/frontier_coverage_15": 0.014215116699536642,
|
|
"rewards/frontier_coverage_20": 0.05826817204554876,
|
|
"rewards/frontier_coverage_25": 0.19660960137844086,
|
|
"rewards/frontier_coverage_5": 0.011394298480202755,
|
|
"rewards/frontier_ece_reward": 0.0032965668166677156,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15244321525096893,
|
|
"signal/accuracy_reward/group_std_mean": 0.2041437178850174,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4166666865348816,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07622160762548447,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07622160762548447,
|
|
"signal/advantage_abs_mean": 0.09584795186916988,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09584795186916988,
|
|
"signal/advantage_pre_scale_std": 0.16963096956411997,
|
|
"signal/advantage_std": 0.16963096956411997,
|
|
"signal/brier_reward/centered_abs_mean": 0.14546114454666773,
|
|
"signal/brier_reward/group_std_mean": 0.1875475843747457,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018182643068333466,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018182643068333466,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.041573978339632355,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06479879096150398,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005196747292454044,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005196747292454044,
|
|
"signal/format_reward/centered_abs_mean": 0.020055700559169054,
|
|
"signal/format_reward/group_std_mean": 0.03889835067093372,
|
|
"signal/format_reward/group_zero_std_frac": 0.8379629651705424,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010027850279584527,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010027850279584527,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00507251297434171,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008938198909163475,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.07979726131695e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.07979726131695e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11825272192557652,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16622615853945413,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002116723839814464,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002116723839814464,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11266253391901652,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15934370954831442,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002016659282768766,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002016659282768766,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09346077839533488,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13282916943232217,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016729478569080432,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016729478569080432,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07203005999326706,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09641539553801219,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012893380674843986,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012893380674843986,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13922655334075293,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17964440087477365,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024921553364644447,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024921553364644447,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11825272192557652,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16622615853945413,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002116723839814464,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002116723839814464,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009944108004371325,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013924115958313147,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012430135005464156,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012430135005464156,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.00542832244760715,
|
|
"train_runtime": 18893.3969,
|
|
"train_samples_per_second": 0.794,
|
|
"train_steps_per_second": 0.011
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 497360332,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|