{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.47117773098793353, "calibration/batch_distribution_entropy": 0.2859162943204464, "calibration/buffer_distribution_entropy": 0.29128966207738405, "calibration/confidence_entropy": 0.22040658035376542, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4490633383918142, "calibration/mean_confidence": 0.9153706387891617, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019357638888888907, "completions/max_length": 3998.4, "completions/max_terminated_length": 3998.4, "completions/mean_length": 516.5592895507813, "completions/mean_terminated_length": 526.74970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.004050114192068577, "learning_rate": 5.952380952380953e-07, "loss": 0.0035, "num_tokens": 9064971.0, "reward": 0.5567546248435974, "reward_std": 0.4937511146068573, "rewards/accuracy_reward": 0.2719618022441864, "rewards/brier_reward": 0.32214988470077516, "rewards/confidence_uniqueness_reward": 0.2880812108516693, "rewards/format_reward": 0.6037326335906983, "rewards/frontier_aurc_reward": 0.16993313934653997, "rewards/frontier_coverage_1": 0.1746483048889786, "rewards/frontier_coverage_10": 0.1746483048889786, "rewards/frontier_coverage_15": 0.1746483048889786, "rewards/frontier_coverage_20": 0.1746483048889786, "rewards/frontier_coverage_25": 0.1746483048889786, "rewards/frontier_coverage_5": 0.1746483048889786, "rewards/frontier_ece_reward": 0.16663597179576756, "signal/accuracy_reward/centered_abs_mean": 0.3148274779319763, "signal/accuracy_reward/group_std_mean": 0.3746787905693054, "signal/accuracy_reward/group_zero_std_frac": 0.07500000149011612, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15741373896598815, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15741373896598815, "signal/advantage_abs_mean": 0.42672874331474303, "signal/advantage_pre_scale_abs_mean": 0.42672874331474303, "signal/advantage_pre_scale_std": 0.49877622723579407, "signal/advantage_std": 0.49877622723579407, "signal/brier_reward/centered_abs_mean": 0.3234766721725464, "signal/brier_reward/group_std_mean": 0.37651577591896057, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0404345840215683, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0404345840215683, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2356457978487015, "signal/confidence_uniqueness_reward/group_std_mean": 0.2879685640335083, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029455724731087686, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029455724731087686, "signal/format_reward/centered_abs_mean": 0.43563910126686095, "signal/format_reward/group_std_mean": 0.4719055533409119, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21781955063343048, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21781955063343048, "signal/frontier_aurc_reward/centered_abs_mean": 0.19491605628281833, "signal/frontier_aurc_reward/group_std_mean": 0.22967487033456563, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0034889972681412472, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0034889972681412472, "signal/frontier_coverage_1/centered_abs_mean": 0.1973324902355671, "signal/frontier_coverage_1/group_std_mean": 0.23634177595376968, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_10/centered_abs_mean": 0.1973324902355671, "signal/frontier_coverage_10/group_std_mean": 0.23634177595376968, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_15/centered_abs_mean": 0.1973324902355671, "signal/frontier_coverage_15/group_std_mean": 0.23634177595376968, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_20/centered_abs_mean": 0.1973324902355671, "signal/frontier_coverage_20/group_std_mean": 0.23634177595376968, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_25/centered_abs_mean": 0.1973324902355671, "signal/frontier_coverage_25/group_std_mean": 0.23634177595376968, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_5/centered_abs_mean": 0.1973324902355671, "signal/frontier_coverage_5/group_std_mean": 0.23634177595376968, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035322514304425567, "signal/frontier_ece_reward/centered_abs_mean": 0.2454825758934021, "signal/frontier_ece_reward/group_std_mean": 0.2889118641614914, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030685321986675264, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030685321986675264, "step": 5 }, { "calibration/aurc": 0.4703947084269996, "calibration/batch_distribution_entropy": 0.2489212385632244, "calibration/buffer_distribution_entropy": 0.2842347502223941, "calibration/confidence_entropy": 0.222495853565373, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4272499701997674, "calibration/mean_confidence": 0.9193827270131825, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017795138888888885, "completions/max_length": 4050.8, "completions/max_terminated_length": 4050.8, "completions/mean_length": 477.50877685546874, "completions/mean_terminated_length": 486.2249816894531, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.023999700003749954, "grad_norm": 0.004755768924951553, "learning_rate": 1.1904761904761906e-06, "loss": 0.0049, "num_tokens": 17648592.0, "reward": 0.5936285734176636, "reward_std": 0.42507994174957275, "rewards/accuracy_reward": 0.3043402791023254, "rewards/brier_reward": 0.36583648920059203, "rewards/confidence_uniqueness_reward": 0.35002759099006653, "rewards/format_reward": 0.7091145873069763, "rewards/frontier_aurc_reward": -0.006095233652740717, "rewards/frontier_coverage_1": 0.005141327064484358, "rewards/frontier_coverage_10": 0.005141327064484358, "rewards/frontier_coverage_15": 0.005141327064484358, "rewards/frontier_coverage_20": 0.005141327064484358, "rewards/frontier_coverage_25": 0.005141327064484358, "rewards/frontier_coverage_5": 0.005141327064484358, "rewards/frontier_ece_reward": -0.02419957034289837, "signal/accuracy_reward/centered_abs_mean": 0.3331814229488373, "signal/accuracy_reward/group_std_mean": 0.38797804713249207, "signal/accuracy_reward/group_zero_std_frac": 0.08055555745959282, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16659071147441865, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16659071147441865, "signal/advantage_abs_mean": 0.3555244505405426, "signal/advantage_pre_scale_abs_mean": 0.3555244505405426, "signal/advantage_pre_scale_std": 0.42819578647613527, "signal/advantage_std": 0.42819578647613527, "signal/brier_reward/centered_abs_mean": 0.3271039307117462, "signal/brier_reward/group_std_mean": 0.3775700032711029, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.040887991338968276, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.040887991338968276, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22568395733833313, "signal/confidence_uniqueness_reward/group_std_mean": 0.2805815994739532, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02821049466729164, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02821049466729164, "signal/format_reward/centered_abs_mean": 0.36475151777267456, "signal/format_reward/group_std_mean": 0.4275496780872345, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18237575888633728, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.18237575888633728, "signal/frontier_aurc_reward/centered_abs_mean": 0.006412158068269491, "signal/frontier_aurc_reward/group_std_mean": 0.008681737259030342, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.000114777623093687, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.000114777623093687, "signal/frontier_coverage_1/centered_abs_mean": 0.015101977251470089, "signal/frontier_coverage_1/group_std_mean": 0.03188966251909733, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_10/centered_abs_mean": 0.015101977251470089, "signal/frontier_coverage_10/group_std_mean": 0.03188966251909733, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_15/centered_abs_mean": 0.015101977251470089, "signal/frontier_coverage_15/group_std_mean": 0.03188966251909733, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_20/centered_abs_mean": 0.015101977251470089, "signal/frontier_coverage_20/group_std_mean": 0.03188966251909733, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_25/centered_abs_mean": 0.015101977251470089, "signal/frontier_coverage_25/group_std_mean": 0.03188966251909733, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_5/centered_abs_mean": 0.015101977251470089, "signal/frontier_coverage_5/group_std_mean": 0.03188966251909733, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00027032537909690293, "signal/frontier_ece_reward/centered_abs_mean": 0.1461539089679718, "signal/frontier_ece_reward/group_std_mean": 0.1712912440299988, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.018269238620996477, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.018269238620996477, "step": 10 }, { "calibration/aurc": 0.5425230736344001, "calibration/batch_distribution_entropy": 0.2866005839972722, "calibration/buffer_distribution_entropy": 0.27346687050298973, "calibration/confidence_entropy": 0.24153054751308325, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5060684725221445, "calibration/mean_confidence": 0.9123262424801549, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3972.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 429.3203125, "completions/mean_terminated_length": 434.8564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.03599955000562493, "grad_norm": 0.0030024258885532618, "learning_rate": 1.7857142857142859e-06, "loss": -0.0073, "num_tokens": 25696346.0, "reward": 0.7226181745529174, "reward_std": 0.328831684589386, "rewards/accuracy_reward": 0.32343749403953553, "rewards/brier_reward": 0.4225196659564972, "rewards/confidence_uniqueness_reward": 0.4882237255573273, "rewards/format_reward": 0.909375011920929, "rewards/frontier_aurc_reward": -0.007289860583841801, "rewards/frontier_coverage_1": 0.008264282252639532, "rewards/frontier_coverage_10": 0.008264282252639532, "rewards/frontier_coverage_15": 0.008264282252639532, "rewards/frontier_coverage_20": 0.008264282252639532, "rewards/frontier_coverage_25": 0.008264282252639532, "rewards/frontier_coverage_5": 0.008264282252639532, "rewards/frontier_ece_reward": -0.06710481271147728, "signal/accuracy_reward/centered_abs_mean": 0.32534722089767454, "signal/accuracy_reward/group_std_mean": 0.38704482316970823, "signal/accuracy_reward/group_zero_std_frac": 0.05000000149011612, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16267361044883727, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16267361044883727, "signal/advantage_abs_mean": 0.2616334229707718, "signal/advantage_pre_scale_abs_mean": 0.2616334229707718, "signal/advantage_pre_scale_std": 0.3352875530719757, "signal/advantage_std": 0.3352875530719757, "signal/brier_reward/centered_abs_mean": 0.302908456325531, "signal/brier_reward/group_std_mean": 0.356386786699295, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03786355704069137, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03786355704069137, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19381275475025178, "signal/confidence_uniqueness_reward/group_std_mean": 0.24320927560329436, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024226594343781473, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.024226594343781473, "signal/format_reward/centered_abs_mean": 0.14767795130610467, "signal/format_reward/group_std_mean": 0.2351602092385292, "signal/format_reward/group_zero_std_frac": 0.19444444738328456, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07383897565305234, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.07383897565305234, "signal/frontier_aurc_reward/centered_abs_mean": 0.005967351235449314, "signal/frontier_aurc_reward/group_std_mean": 0.007935958448797464, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010681558633223176, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010681558633223176, "signal/frontier_coverage_1/centered_abs_mean": 0.01820983216166496, "signal/frontier_coverage_1/group_std_mean": 0.035216915607452395, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_10/centered_abs_mean": 0.01820983216166496, "signal/frontier_coverage_10/group_std_mean": 0.035216915607452395, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_15/centered_abs_mean": 0.01820983216166496, "signal/frontier_coverage_15/group_std_mean": 0.035216915607452395, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_20/centered_abs_mean": 0.01820983216166496, "signal/frontier_coverage_20/group_std_mean": 0.035216915607452395, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_25/centered_abs_mean": 0.01820983216166496, "signal/frontier_coverage_25/group_std_mean": 0.035216915607452395, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_5/centered_abs_mean": 0.01820983216166496, "signal/frontier_coverage_5/group_std_mean": 0.035216915607452395, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00032595600350759923, "signal/frontier_ece_reward/centered_abs_mean": 0.16836784183979034, "signal/frontier_ece_reward/group_std_mean": 0.1972884237766266, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.021045980229973792, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.021045980229973792, "step": 15 }, { "calibration/aurc": 0.49082180069506387, "calibration/batch_distribution_entropy": 0.3908793649822321, "calibration/buffer_distribution_entropy": 0.2954284965643815, "calibration/confidence_entropy": 0.312221649530802, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0356020942408377, "calibration/coverage@30%": 0.07853403141361257, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4198799349990739, "calibration/mean_confidence": 0.883742897278178, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333325, "completions/max_length": 3640.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 430.2876708984375, "completions/mean_terminated_length": 434.2104797363281, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.04799940000749991, "grad_norm": 0.0022384580224752426, "learning_rate": 2.380952380952381e-06, "loss": -0.0087, "num_tokens": 33766956.0, "reward": 0.8248475670814515, "reward_std": 0.26103408336639405, "rewards/accuracy_reward": 0.39869791865348814, "rewards/brier_reward": 0.521475088596344, "rewards/confidence_uniqueness_reward": 0.5816608428955078, "rewards/format_reward": 0.9853298544883728, "rewards/frontier_aurc_reward": -0.0063677155412733555, "rewards/frontier_coverage_1": 0.009802600927650928, "rewards/frontier_coverage_10": 0.009802600927650928, "rewards/frontier_coverage_15": 0.009802600927650928, "rewards/frontier_coverage_20": 0.009802600927650928, "rewards/frontier_coverage_25": 0.009802600927650928, "rewards/frontier_coverage_5": 0.009802600927650928, "rewards/frontier_ece_reward": -0.04797694368753582, "signal/accuracy_reward/centered_abs_mean": 0.3026530027389526, "signal/accuracy_reward/group_std_mean": 0.3679898679256439, "signal/accuracy_reward/group_zero_std_frac": 0.0722222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1513265013694763, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1513265013694763, "signal/advantage_abs_mean": 0.20957765579223633, "signal/advantage_pre_scale_abs_mean": 0.20957765579223633, "signal/advantage_pre_scale_std": 0.269910192489624, "signal/advantage_std": 0.269910192489624, "signal/brier_reward/centered_abs_mean": 0.26383275985717775, "signal/brier_reward/group_std_mean": 0.31845086216926577, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03297909498214722, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03297909498214722, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19037654995918274, "signal/confidence_uniqueness_reward/group_std_mean": 0.22486165761947632, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023797068744897842, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023797068744897842, "signal/format_reward/centered_abs_mean": 0.027153862826526164, "signal/format_reward/group_std_mean": 0.061953308433294295, "signal/format_reward/group_zero_std_frac": 0.7055555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013576931413263082, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013576931413263082, "signal/frontier_aurc_reward/centered_abs_mean": 0.00443207286298275, "signal/frontier_aurc_reward/group_std_mean": 0.006194448843598366, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.9334105248563e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.9334105248563e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.026542419195175172, "signal/frontier_coverage_1/group_std_mean": 0.04689379408955574, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_10/centered_abs_mean": 0.026542419195175172, "signal/frontier_coverage_10/group_std_mean": 0.04689379408955574, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_15/centered_abs_mean": 0.026542419195175172, "signal/frontier_coverage_15/group_std_mean": 0.04689379408955574, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_20/centered_abs_mean": 0.026542419195175172, "signal/frontier_coverage_20/group_std_mean": 0.04689379408955574, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_25/centered_abs_mean": 0.026542419195175172, "signal/frontier_coverage_25/group_std_mean": 0.04689379408955574, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_5/centered_abs_mean": 0.026542419195175172, "signal/frontier_coverage_5/group_std_mean": 0.04689379408955574, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0004751092870719731, "signal/frontier_ece_reward/centered_abs_mean": 0.1626291185617447, "signal/frontier_ece_reward/group_std_mean": 0.19648596048355102, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.020328639820218087, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.020328639820218087, "step": 20 }, { "calibration/aurc": 0.39173247160966357, "calibration/batch_distribution_entropy": 0.5750020929892202, "calibration/buffer_distribution_entropy": 0.37760437579184847, "calibration/confidence_entropy": 0.39650951222978464, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.06945169712793733, "calibration/coverage@25%": 0.11382180156657964, "calibration/coverage@30%": 0.20625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2721868572108504, "calibration/mean_confidence": 0.8325929617455735, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010937499999999978, "completions/max_length": 3833.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 463.96650390625, "completions/mean_terminated_length": 469.0935791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 100.4, "epoch": 0.05999925000937488, "grad_norm": 0.0008329463307745755, "learning_rate": 2.9761904761904763e-06, "loss": -0.0079, "num_tokens": 42236298.0, "reward": 0.9248562097549439, "reward_std": 0.2358280152082443, "rewards/accuracy_reward": 0.5243923544883728, "rewards/brier_reward": 0.6481992244720459, "rewards/confidence_uniqueness_reward": 0.6879818558692932, "rewards/format_reward": 0.9878472208976745, "rewards/frontier_aurc_reward": -0.004981133854016662, "rewards/frontier_coverage_1": 0.002557537937536836, "rewards/frontier_coverage_10": 0.002557537937536836, "rewards/frontier_coverage_15": 0.002557537937536836, "rewards/frontier_coverage_20": 0.002557537937536836, "rewards/frontier_coverage_25": 0.002557537937536836, "rewards/frontier_coverage_5": 0.002557537937536836, "rewards/frontier_ece_reward": 0.012226011976599694, "signal/accuracy_reward/centered_abs_mean": 0.28567166328430177, "signal/accuracy_reward/group_std_mean": 0.35009088516235354, "signal/accuracy_reward/group_zero_std_frac": 0.11388889104127883, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14283583164215088, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14283583164215088, "signal/advantage_abs_mean": 0.1873850554227829, "signal/advantage_pre_scale_abs_mean": 0.1873850554227829, "signal/advantage_pre_scale_std": 0.2495607316493988, "signal/advantage_std": 0.2495607316493988, "signal/brier_reward/centered_abs_mean": 0.21691001057624817, "signal/brier_reward/group_std_mean": 0.26915258169174194, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027113751322031022, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.027113751322031022, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11467475891113281, "signal/confidence_uniqueness_reward/group_std_mean": 0.14529187828302384, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014334344863891601, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014334344863891601, "signal/format_reward/centered_abs_mean": 0.021853298880159855, "signal/format_reward/group_std_mean": 0.0452168170362711, "signal/format_reward/group_zero_std_frac": 0.8, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010926649440079927, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010926649440079927, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036150857340544462, "signal/frontier_aurc_reward/group_std_mean": 0.005566043313592672, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.471003143815324e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.471003143815324e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.040765144675970075, "signal/frontier_coverage_1/group_std_mean": 0.062347762286663055, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_10/centered_abs_mean": 0.040765144675970075, "signal/frontier_coverage_10/group_std_mean": 0.062347762286663055, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_15/centered_abs_mean": 0.040765144675970075, "signal/frontier_coverage_15/group_std_mean": 0.062347762286663055, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_20/centered_abs_mean": 0.040765144675970075, "signal/frontier_coverage_20/group_std_mean": 0.062347762286663055, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_25/centered_abs_mean": 0.040765144675970075, "signal/frontier_coverage_25/group_std_mean": 0.062347762286663055, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_5/centered_abs_mean": 0.040765144675970075, "signal/frontier_coverage_5/group_std_mean": 0.062347762286663055, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007296960451640188, "signal/frontier_ece_reward/centered_abs_mean": 0.13129711151123047, "signal/frontier_ece_reward/group_std_mean": 0.16005820035934448, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01641213893890381, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01641213893890381, "step": 25 }, { "calibration/aurc": 0.3284406681871355, "calibration/batch_distribution_entropy": 0.6747878884432902, "calibration/buffer_distribution_entropy": 0.5584650318532487, "calibration/confidence_entropy": 0.5556384101353083, "calibration/coverage@0%": 0.0010471204188481676, "calibration/coverage@1%": 0.0010471204188481676, "calibration/coverage@10%": 0.0010471204188481676, "calibration/coverage@15%": 0.015785200324775317, "calibration/coverage@20%": 0.07140799084621567, "calibration/coverage@25%": 0.1294562021482756, "calibration/coverage@30%": 0.36101967648308186, "calibration/coverage@5%": 0.0010471204188481676, "calibration/ece": 0.12430206094720246, "calibration/mean_confidence": 0.7177475003670872, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013802083333333326, "completions/max_length": 3608.6, "completions/max_terminated_length": 3608.6, "completions/mean_length": 528.2802978515625, "completions/mean_terminated_length": 535.6919372558593, "completions/min_length": 0.0, "completions/min_terminated_length": 127.2, "epoch": 0.07199910001124986, "grad_norm": 0.0006069006049074233, "learning_rate": 3.5714285714285718e-06, "loss": -0.0105, "num_tokens": 51432007.0, "reward": 0.9590584993362427, "reward_std": 0.19429016709327698, "rewards/accuracy_reward": 0.5807291746139527, "rewards/brier_reward": 0.7223093867301941, "rewards/confidence_uniqueness_reward": 0.6810937523841858, "rewards/format_reward": 0.9850694298744201, "rewards/frontier_aurc_reward": -0.0038667929824441672, "rewards/frontier_coverage_1": -0.010892250412143766, "rewards/frontier_coverage_10": -0.010892250412143766, "rewards/frontier_coverage_15": -0.010892250412143766, "rewards/frontier_coverage_20": -0.010892250412143766, "rewards/frontier_coverage_25": -0.010892250412143766, "rewards/frontier_coverage_5": -0.010892250412143766, "rewards/frontier_ece_reward": 0.015782377682626247, "signal/accuracy_reward/centered_abs_mean": 0.23936631977558137, "signal/accuracy_reward/group_std_mean": 0.30298495292663574, "signal/accuracy_reward/group_zero_std_frac": 0.1833333343267441, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11968315988779069, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11968315988779069, "signal/advantage_abs_mean": 0.14813488423824311, "signal/advantage_pre_scale_abs_mean": 0.14813488423824311, "signal/advantage_pre_scale_std": 0.21500767171382903, "signal/advantage_std": 0.21500767171382903, "signal/brier_reward/centered_abs_mean": 0.146245139837265, "signal/brier_reward/group_std_mean": 0.18797107338905333, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018280642479658125, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018280642479658125, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11807332634925842, "signal/confidence_uniqueness_reward/group_std_mean": 0.15081189423799515, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014759165793657303, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014759165793657303, "signal/format_reward/centered_abs_mean": 0.02557508684694767, "signal/format_reward/group_std_mean": 0.04947390109300613, "signal/format_reward/group_zero_std_frac": 0.7916666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012787543423473834, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012787543423473834, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022939105052500962, "signal/frontier_aurc_reward/group_std_mean": 0.0038691143039613963, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1060995863517745e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1060995863517745e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.0672921821475029, "signal/frontier_coverage_1/group_std_mean": 0.08672473132610321, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_10/centered_abs_mean": 0.0672921821475029, "signal/frontier_coverage_10/group_std_mean": 0.08672473132610321, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_15/centered_abs_mean": 0.0672921821475029, "signal/frontier_coverage_15/group_std_mean": 0.08672473132610321, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_20/centered_abs_mean": 0.0672921821475029, "signal/frontier_coverage_20/group_std_mean": 0.08672473132610321, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_25/centered_abs_mean": 0.0672921821475029, "signal/frontier_coverage_25/group_std_mean": 0.08672473132610321, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_5/centered_abs_mean": 0.0672921821475029, "signal/frontier_coverage_5/group_std_mean": 0.08672473132610321, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001204529986716807, "signal/frontier_ece_reward/centered_abs_mean": 0.058205033838748935, "signal/frontier_ece_reward/group_std_mean": 0.07591410800814628, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007275629229843617, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007275629229843617, "step": 30 }, { "calibration/aurc": 0.26403452047660375, "calibration/batch_distribution_entropy": 0.6640550979352706, "calibration/buffer_distribution_entropy": 0.6891557885929268, "calibration/confidence_entropy": 0.5629938859398411, "calibration/coverage@0%": 0.0042235751477624536, "calibration/coverage@1%": 0.0042235751477624536, "calibration/coverage@10%": 0.02877949620039404, "calibration/coverage@15%": 0.0875555012196266, "calibration/coverage@20%": 0.10297304507927574, "calibration/coverage@25%": 0.480761541889483, "calibration/coverage@30%": 0.8077846479500892, "calibration/coverage@5%": 0.0042235751477624536, "calibration/ece": 0.0715633782015198, "calibration/mean_confidence": 0.7171294939722299, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015017361111111094, "completions/max_length": 3967.2, "completions/max_terminated_length": 3967.2, "completions/mean_length": 572.1895263671875, "completions/mean_terminated_length": 580.9646606445312, "completions/min_length": 0.0, "completions/min_terminated_length": 165.4, "epoch": 0.08399895001312484, "grad_norm": 0.00052351359045133, "learning_rate": 4.166666666666667e-06, "loss": -0.011, "num_tokens": 61101070.0, "reward": 0.9819595694541932, "reward_std": 0.17260923683643342, "rewards/accuracy_reward": 0.63125, "rewards/brier_reward": 0.7531757354736328, "rewards/confidence_uniqueness_reward": 0.6459343433380127, "rewards/format_reward": 0.9839409828186035, "rewards/frontier_aurc_reward": -0.003214681288227439, "rewards/frontier_coverage_1": -0.018123492784798145, "rewards/frontier_coverage_10": -0.018123492784798145, "rewards/frontier_coverage_15": -0.018123492784798145, "rewards/frontier_coverage_20": -0.018123492784798145, "rewards/frontier_coverage_25": -0.018123492784798145, "rewards/frontier_coverage_5": -0.018123492784798145, "rewards/frontier_ece_reward": 0.011834413185715676, "signal/accuracy_reward/centered_abs_mean": 0.21002604067325592, "signal/accuracy_reward/group_std_mean": 0.26851107478141784, "signal/accuracy_reward/group_zero_std_frac": 0.272222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10501302033662796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10501302033662796, "signal/advantage_abs_mean": 0.13170475214719773, "signal/advantage_pre_scale_abs_mean": 0.13170475214719773, "signal/advantage_pre_scale_std": 0.19973941445350646, "signal/advantage_std": 0.19973941445350646, "signal/brier_reward/centered_abs_mean": 0.1260453164577484, "signal/brier_reward/group_std_mean": 0.16464770436286927, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01575566455721855, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01575566455721855, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1379134923219681, "signal/confidence_uniqueness_reward/group_std_mean": 0.16885134875774382, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01723918654024601, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01723918654024601, "signal/format_reward/centered_abs_mean": 0.02651367262005806, "signal/format_reward/group_std_mean": 0.04611495956778526, "signal/format_reward/group_zero_std_frac": 0.8222222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01325683631002903, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01325683631002903, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030739160254597666, "signal/frontier_aurc_reward/group_std_mean": 0.005279354751110077, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5023095046635714e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5023095046635714e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.06398859769105911, "signal/frontier_coverage_1/group_std_mean": 0.08186554163694382, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_10/centered_abs_mean": 0.06398859769105911, "signal/frontier_coverage_10/group_std_mean": 0.08186554163694382, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_15/centered_abs_mean": 0.06398859769105911, "signal/frontier_coverage_15/group_std_mean": 0.08186554163694382, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_20/centered_abs_mean": 0.06398859769105911, "signal/frontier_coverage_20/group_std_mean": 0.08186554163694382, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_25/centered_abs_mean": 0.06398859769105911, "signal/frontier_coverage_25/group_std_mean": 0.08186554163694382, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_5/centered_abs_mean": 0.06398859769105911, "signal/frontier_coverage_5/group_std_mean": 0.08186554163694382, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001145395915955305, "signal/frontier_ece_reward/centered_abs_mean": 0.028756240755319594, "signal/frontier_ece_reward/group_std_mean": 0.038216957822442055, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035945300944149492, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035945300944149492, "step": 35 }, { "calibration/aurc": 0.2924937155019407, "calibration/batch_distribution_entropy": 0.6771849554275204, "calibration/buffer_distribution_entropy": 0.6937744894431359, "calibration/confidence_entropy": 0.4959900350122277, "calibration/coverage@0%": 0.0026329676433828634, "calibration/coverage@1%": 0.0026329676433828634, "calibration/coverage@10%": 0.03893218024180806, "calibration/coverage@15%": 0.07830225898196555, "calibration/coverage@20%": 0.17442651665930492, "calibration/coverage@25%": 0.344083234092524, "calibration/coverage@30%": 0.4509267354187839, "calibration/coverage@5%": 0.0026329676433828634, "calibration/ece": 0.1226596169961736, "calibration/mean_confidence": 0.7687242431311752, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111093, "completions/max_length": 3888.2, "completions/max_terminated_length": 3888.2, "completions/mean_length": 601.69931640625, "completions/mean_terminated_length": 610.4102172851562, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.09599880001499982, "grad_norm": 0.0005529921618290246, "learning_rate": 4.761904761904762e-06, "loss": -0.0114, "num_tokens": 71152166.0, "reward": 0.9981375932693481, "reward_std": 0.1639564424753189, "rewards/accuracy_reward": 0.6465277910232544, "rewards/brier_reward": 0.7522081136703491, "rewards/confidence_uniqueness_reward": 0.7125440120697022, "rewards/format_reward": 0.9847222208976746, "rewards/frontier_aurc_reward": -0.005940702743828297, "rewards/frontier_coverage_1": -0.01573992893099785, "rewards/frontier_coverage_10": -0.01573992893099785, "rewards/frontier_coverage_15": -0.01573992893099785, "rewards/frontier_coverage_20": -0.014948921743780375, "rewards/frontier_coverage_25": -0.012497423123568297, "rewards/frontier_coverage_5": -0.01573992893099785, "rewards/frontier_ece_reward": 0.009145193360745906, "signal/accuracy_reward/centered_abs_mean": 0.1869140625, "signal/accuracy_reward/group_std_mean": 0.2490226775407791, "signal/accuracy_reward/group_zero_std_frac": 0.2916666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09345703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09345703125, "signal/advantage_abs_mean": 0.11943778544664382, "signal/advantage_pre_scale_abs_mean": 0.11943778544664382, "signal/advantage_pre_scale_std": 0.19496967792510986, "signal/advantage_std": 0.19496967792510986, "signal/brier_reward/centered_abs_mean": 0.13451988697052003, "signal/brier_reward/group_std_mean": 0.17633683681488038, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016814985871315004, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016814985871315004, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11400759369134902, "signal/confidence_uniqueness_reward/group_std_mean": 0.14184125363826752, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014250949211418628, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014250949211418628, "signal/format_reward/centered_abs_mean": 0.02620442695915699, "signal/format_reward/group_std_mean": 0.04804914817214012, "signal/format_reward/group_zero_std_frac": 0.8083333611488343, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013102213479578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013102213479578495, "signal/frontier_aurc_reward/centered_abs_mean": 0.008588980231434108, "signal/frontier_aurc_reward/group_std_mean": 0.014885761030018329, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00015374274080386384, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00015374274080386384, "signal/frontier_coverage_1/centered_abs_mean": 0.05871818587183952, "signal/frontier_coverage_1/group_std_mean": 0.07801110148429871, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010510555002838373, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010510555002838373, "signal/frontier_coverage_10/centered_abs_mean": 0.05871818587183952, "signal/frontier_coverage_10/group_std_mean": 0.07801110148429871, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010510555002838373, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010510555002838373, "signal/frontier_coverage_15/centered_abs_mean": 0.05871818587183952, "signal/frontier_coverage_15/group_std_mean": 0.07801110148429871, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010510555002838373, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010510555002838373, "signal/frontier_coverage_20/centered_abs_mean": 0.05651564598083496, "signal/frontier_coverage_20/group_std_mean": 0.07526759058237076, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010116300079971551, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010116300079971551, "signal/frontier_coverage_25/centered_abs_mean": 0.04918262958526611, "signal/frontier_coverage_25/group_std_mean": 0.06600879728794098, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008803689968772232, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008803689968772232, "signal/frontier_coverage_5/centered_abs_mean": 0.05871818587183952, "signal/frontier_coverage_5/group_std_mean": 0.07801110148429871, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010510555002838373, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010510555002838373, "signal/frontier_ece_reward/centered_abs_mean": 0.017127741128206253, "signal/frontier_ece_reward/group_std_mean": 0.02196224555373192, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021409676410257816, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021409676410257816, "step": 40 }, { "calibration/aurc": 0.21364178122642002, "calibration/batch_distribution_entropy": 0.6522730345221792, "calibration/buffer_distribution_entropy": 0.6968874334968922, "calibration/confidence_entropy": 0.4419758190916566, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.06578919131585535, "calibration/coverage@15%": 0.10395714068225181, "calibration/coverage@20%": 0.5104134577073929, "calibration/coverage@25%": 0.7441382057354524, "calibration/coverage@30%": 0.963322553686276, "calibration/coverage@5%": 0.0, "calibration/ece": 0.09702895701947509, "calibration/mean_confidence": 0.788752123718866, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01449652777777779, "completions/max_length": 3590.6, "completions/max_terminated_length": 3590.6, "completions/mean_length": 638.76650390625, "completions/mean_terminated_length": 648.3088623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.1079986500168748, "grad_norm": 0.0009035434923134744, "learning_rate": 4.909638554216868e-06, "loss": -0.0116, "num_tokens": 81646020.0, "reward": 1.009622061252594, "reward_std": 0.16088135838508605, "rewards/accuracy_reward": 0.6539930462837219, "rewards/brier_reward": 0.7586196541786194, "rewards/confidence_uniqueness_reward": 0.7592846035957337, "rewards/format_reward": 0.9852430701255799, "rewards/frontier_aurc_reward": -0.007242204900830984, "rewards/frontier_coverage_1": -0.006770293042063713, "rewards/frontier_coverage_10": -0.006770293042063713, "rewards/frontier_coverage_15": -0.00660779308527708, "rewards/frontier_coverage_20": -0.0016520024975761772, "rewards/frontier_coverage_25": 0.002179227757733315, "rewards/frontier_coverage_5": -0.006770293042063713, "rewards/frontier_ece_reward": 0.006944606266915798, "signal/accuracy_reward/centered_abs_mean": 0.18404948115348815, "signal/accuracy_reward/group_std_mean": 0.24551969170570373, "signal/accuracy_reward/group_zero_std_frac": 0.2944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09202474057674408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09202474057674408, "signal/advantage_abs_mean": 0.11716565787792206, "signal/advantage_pre_scale_abs_mean": 0.11716565787792206, "signal/advantage_pre_scale_std": 0.19196827709674835, "signal/advantage_std": 0.19196827709674835, "signal/brier_reward/centered_abs_mean": 0.14656473994255065, "signal/brier_reward/group_std_mean": 0.1906241148710251, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01832059249281883, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01832059249281883, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08014513701200485, "signal/confidence_uniqueness_reward/group_std_mean": 0.10737452805042266, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010018142126500606, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010018142126500606, "signal/format_reward/centered_abs_mean": 0.024522569589316846, "signal/format_reward/group_std_mean": 0.044725016504526136, "signal/format_reward/group_zero_std_frac": 0.8194444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012261284794658423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012261284794658423, "signal/frontier_aurc_reward/centered_abs_mean": 0.01112304050475359, "signal/frontier_aurc_reward/group_std_mean": 0.019053217209875583, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00019910241826437414, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00019910241826437414, "signal/frontier_coverage_1/centered_abs_mean": 0.062235020101070404, "signal/frontier_coverage_1/group_std_mean": 0.08576205521821975, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0011140068061649799, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011140068061649799, "signal/frontier_coverage_10/centered_abs_mean": 0.062235020101070404, "signal/frontier_coverage_10/group_std_mean": 0.08576205521821975, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0011140068061649799, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011140068061649799, "signal/frontier_coverage_15/centered_abs_mean": 0.06180379539728165, "signal/frontier_coverage_15/group_std_mean": 0.08518806099891663, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001106287888251245, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001106287888251245, "signal/frontier_coverage_20/centered_abs_mean": 0.04614866077899933, "signal/frontier_coverage_20/group_std_mean": 0.06483671665191651, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008260609698481858, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008260609698481858, "signal/frontier_coverage_25/centered_abs_mean": 0.031763285398483276, "signal/frontier_coverage_25/group_std_mean": 0.04507193565368652, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005685627809725701, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005685627809725701, "signal/frontier_coverage_5/centered_abs_mean": 0.062235020101070404, "signal/frontier_coverage_5/group_std_mean": 0.08576205521821975, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0011140068061649799, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011140068061649799, "signal/frontier_ece_reward/centered_abs_mean": 0.011737137474119664, "signal/frontier_ece_reward/group_std_mean": 0.015026122331619263, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001467142184264958, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001467142184264958, "step": 45 }, { "calibration/aurc": 0.38193449229887216, "calibration/batch_distribution_entropy": 0.6438885672841508, "calibration/buffer_distribution_entropy": 0.695458344727496, "calibration/confidence_entropy": 0.47561906765262674, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.032827148711615114, "calibration/coverage@25%": 0.11388592600909449, "calibration/coverage@30%": 0.27125548429792523, "calibration/coverage@5%": 0.0, "calibration/ece": 0.23039143132055712, "calibration/mean_confidence": 0.7539081380507843, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011111111111111094, "completions/max_length": 3867.6, "completions/max_terminated_length": 3867.6, "completions/mean_length": 667.6127685546875, "completions/mean_terminated_length": 675.1084838867188, "completions/min_length": 0.0, "completions/min_terminated_length": 198.4, "epoch": 0.11999850001874976, "grad_norm": 0.0005543790175579488, "learning_rate": 4.759036144578314e-06, "loss": -0.0081, "num_tokens": 92434519.0, "reward": 1.0035521984100342, "reward_std": 0.15531424283981324, "rewards/accuracy_reward": 0.6418402791023254, "rewards/brier_reward": 0.7497736096382142, "rewards/confidence_uniqueness_reward": 0.7558701634407043, "rewards/format_reward": 0.9885416626930237, "rewards/frontier_aurc_reward": -0.006629853136837482, "rewards/frontier_coverage_1": -0.007525159860961139, "rewards/frontier_coverage_10": -0.007525159860961139, "rewards/frontier_coverage_15": -0.007525159860961139, "rewards/frontier_coverage_20": -0.0047117485897615555, "rewards/frontier_coverage_25": 0.0007355780689977109, "rewards/frontier_coverage_5": -0.007525159860961139, "rewards/frontier_ece_reward": 0.007075064536184073, "signal/accuracy_reward/centered_abs_mean": 0.18183593451976776, "signal/accuracy_reward/group_std_mean": 0.23834896683692933, "signal/accuracy_reward/group_zero_std_frac": 0.3333333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09091796725988388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09091796725988388, "signal/advantage_abs_mean": 0.11554279774427414, "signal/advantage_pre_scale_abs_mean": 0.11554279774427414, "signal/advantage_pre_scale_std": 0.18593416810035707, "signal/advantage_std": 0.18593416810035707, "signal/brier_reward/centered_abs_mean": 0.15182736814022063, "signal/brier_reward/group_std_mean": 0.1949179947376251, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01897842101752758, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01897842101752758, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07905573099851608, "signal/confidence_uniqueness_reward/group_std_mean": 0.1051044762134552, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00988196637481451, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00988196637481451, "signal/format_reward/centered_abs_mean": 0.0185546875, "signal/format_reward/group_std_mean": 0.03553221933543682, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00927734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00927734375, "signal/frontier_aurc_reward/centered_abs_mean": 0.00905402349308133, "signal/frontier_aurc_reward/group_std_mean": 0.014961976557970047, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00016206701402552426, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00016206701402552426, "signal/frontier_coverage_1/centered_abs_mean": 0.07501375824213027, "signal/frontier_coverage_1/group_std_mean": 0.10256523936986923, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013427462195977568, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013427462195977568, "signal/frontier_coverage_10/centered_abs_mean": 0.07501375824213027, "signal/frontier_coverage_10/group_std_mean": 0.10256523936986923, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013427462195977568, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013427462195977568, "signal/frontier_coverage_15/centered_abs_mean": 0.07501375824213027, "signal/frontier_coverage_15/group_std_mean": 0.10256523936986923, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013427462195977568, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013427462195977568, "signal/frontier_coverage_20/centered_abs_mean": 0.06658464595675469, "signal/frontier_coverage_20/group_std_mean": 0.09160058945417404, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011918651405721902, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011918651405721902, "signal/frontier_coverage_25/centered_abs_mean": 0.048682621121406554, "signal/frontier_coverage_25/group_std_mean": 0.0679876148700714, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008714188821613789, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008714188821613789, "signal/frontier_coverage_5/centered_abs_mean": 0.07501375824213027, "signal/frontier_coverage_5/group_std_mean": 0.10256523936986923, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013427462195977568, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013427462195977568, "signal/frontier_ece_reward/centered_abs_mean": 0.013041250593960284, "signal/frontier_ece_reward/group_std_mean": 0.016401969455182554, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016301563242450356, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016301563242450356, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.2188544017493154, "eval_calibration/batch_distribution_entropy": 0.6349679723255258, "eval_calibration/buffer_distribution_entropy": 0.6764331779830802, "eval_calibration/confidence_entropy": 0.4723842949949193, "eval_calibration/coverage@0%": 0.09375, "eval_calibration/coverage@1%": 0.09375, "eval_calibration/coverage@10%": 0.18229166666666666, "eval_calibration/coverage@15%": 0.3229166666666667, "eval_calibration/coverage@20%": 0.4270833333333333, "eval_calibration/coverage@25%": 0.6510416666666666, "eval_calibration/coverage@30%": 0.9270833333333334, "eval_calibration/coverage@5%": 0.09375, "eval_calibration/ece": 0.1459895833333333, "eval_calibration/mean_confidence": 0.75015625, "eval_completions/clipped_ratio": 0.008680555555555544, "eval_completions/max_length": 2062.8333333333335, "eval_completions/max_terminated_length": 2062.8333333333335, "eval_completions/mean_length": 663.4890848795573, "eval_completions/mean_terminated_length": 669.3323974609375, "eval_completions/min_length": 89.16666666666667, "eval_completions/min_terminated_length": 255.66666666666666, "eval_loss": 0.0, "eval_num_tokens": 92434519.0, "eval_reward": 0.9972037474314371, "eval_reward_std": 0.28004638353983563, "eval_rewards/accuracy_reward": 0.6336805522441864, "eval_rewards/brier_reward": 0.7541488905747732, "eval_rewards/confidence_uniqueness_reward": 0.7219984630743662, "eval_rewards/format_reward": 0.9904513855775198, "eval_rewards/frontier_aurc_reward": -0.005396095337346196, "eval_rewards/frontier_coverage_1": -0.0035735241253860295, "eval_rewards/frontier_coverage_10": -0.0035735241253860295, "eval_rewards/frontier_coverage_15": -0.0035735241253860295, "eval_rewards/frontier_coverage_20": -0.0029388018786751977, "eval_rewards/frontier_coverage_25": 0.0015783853693089138, "eval_rewards/frontier_coverage_5": -0.0035735241253860295, "eval_rewards/frontier_ece_reward": 0.00796901163024207, "eval_runtime": 192.1708, "eval_samples_per_second": 5.204, "eval_signal/accuracy_reward/centered_abs_mean": 0.4539930572112401, "eval_signal/accuracy_reward/group_std_mean": 0.4836362848679225, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22699652860562006, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22699652860562006, "eval_signal/advantage_abs_mean": 0.25468970090150833, "eval_signal/advantage_pre_scale_abs_mean": 0.25468970090150833, "eval_signal/advantage_pre_scale_std": 0.27723759412765503, "eval_signal/advantage_std": 0.27723759412765503, "eval_signal/brier_reward/centered_abs_mean": 0.22432004163662592, "eval_signal/brier_reward/group_std_mean": 0.28245019912719727, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02804000520457824, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02804000520457824, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10615977024038632, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13361614073316255, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01326997128004829, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01326997128004829, "eval_signal/format_reward/centered_abs_mean": 0.018283419776707888, "eval_signal/format_reward/group_std_mean": 0.04803628381341696, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009141709888353944, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.009141709888353944, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.008657781640067697, "eval_signal/frontier_aurc_reward/group_std_mean": 0.01709814602509141, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00015497428588181114, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00015497428588181114, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.09066158533096313, "eval_signal/frontier_coverage_1/group_std_mean": 0.1351415937145551, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016228424113554258, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016228424113554258, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.09066158533096313, "eval_signal/frontier_coverage_10/group_std_mean": 0.1351415937145551, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016228424113554258, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016228424113554258, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.09066158533096313, "eval_signal/frontier_coverage_15/group_std_mean": 0.1351415937145551, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016228424113554258, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016228424113554258, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.0809883214533329, "eval_signal/frontier_coverage_20/group_std_mean": 0.12148692086338997, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001449690879477809, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001449690879477809, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.05787591636180878, "eval_signal/frontier_coverage_25/group_std_mean": 0.08920721213022868, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010359788914987196, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010359788914987196, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.09066158533096313, "eval_signal/frontier_coverage_5/group_std_mean": 0.1351415937145551, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016228424113554258, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016228424113554258, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.01885589553664128, "eval_signal/frontier_ece_reward/group_std_mean": 0.023027233468989532, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00235698694208016, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00235698694208016, "eval_steps_per_second": 0.031, "step": 50 }, { "calibration/aurc": 0.26727014999321663, "calibration/batch_distribution_entropy": 0.6491827852211476, "calibration/buffer_distribution_entropy": 0.6668052837922903, "calibration/confidence_entropy": 0.45001724253185077, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.08429319371727748, "calibration/coverage@15%": 0.26649214659685866, "calibration/coverage@20%": 0.40556231460090497, "calibration/coverage@25%": 0.5127937336814622, "calibration/coverage@30%": 0.574368543535021, "calibration/coverage@5%": 0.0, "calibration/ece": 0.14787168232023692, "calibration/mean_confidence": 0.7670484147495956, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011545138888888884, "completions/max_length": 3615.6, "completions/max_terminated_length": 3615.6, "completions/mean_length": 701.3361206054688, "completions/mean_terminated_length": 709.5888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 191.4, "epoch": 0.13199835002062474, "grad_norm": 0.000779949186835438, "learning_rate": 4.60843373493976e-06, "loss": -0.0093, "num_tokens": 103594487.0, "reward": 1.0132197499275208, "reward_std": 0.15483529269695281, "rewards/accuracy_reward": 0.6579861164093017, "rewards/brier_reward": 0.7579427838325501, "rewards/confidence_uniqueness_reward": 0.7643311500549317, "rewards/format_reward": 0.98828125, "rewards/frontier_aurc_reward": -0.004797754716128111, "rewards/frontier_coverage_1": -0.014039672841317952, "rewards/frontier_coverage_10": -0.014039672841317952, "rewards/frontier_coverage_15": -0.014039672841317952, "rewards/frontier_coverage_20": -0.009265875071287155, "rewards/frontier_coverage_25": 0.0018376953317783772, "rewards/frontier_coverage_5": -0.014039672841317952, "rewards/frontier_ece_reward": 0.008207211550325156, "signal/accuracy_reward/centered_abs_mean": 0.18003472089767455, "signal/accuracy_reward/group_std_mean": 0.23699139356613158, "signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09001736044883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09001736044883728, "signal/advantage_abs_mean": 0.1147344321012497, "signal/advantage_pre_scale_abs_mean": 0.1147344321012497, "signal/advantage_pre_scale_std": 0.1863584667444229, "signal/advantage_std": 0.1863584667444229, "signal/brier_reward/centered_abs_mean": 0.15073903203010558, "signal/brier_reward/group_std_mean": 0.19318909347057342, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018842379003763197, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018842379003763197, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08415258079767227, "signal/confidence_uniqueness_reward/group_std_mean": 0.11098144203424454, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010519072599709033, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010519072599709033, "signal/format_reward/centered_abs_mean": 0.01915690079331398, "signal/format_reward/group_std_mean": 0.03609803505241871, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00957845039665699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00957845039665699, "signal/frontier_aurc_reward/centered_abs_mean": 0.006547454837709665, "signal/frontier_aurc_reward/group_std_mean": 0.010919546522200108, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00011719943722710013, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00011719943722710013, "signal/frontier_coverage_1/centered_abs_mean": 0.07958068549633027, "signal/frontier_coverage_1/group_std_mean": 0.1058678761124611, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014244942227378487, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014244942227378487, "signal/frontier_coverage_10/centered_abs_mean": 0.07958068549633027, "signal/frontier_coverage_10/group_std_mean": 0.1058678761124611, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014244942227378487, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014244942227378487, "signal/frontier_coverage_15/centered_abs_mean": 0.07958068549633027, "signal/frontier_coverage_15/group_std_mean": 0.1058678761124611, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014244942227378487, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014244942227378487, "signal/frontier_coverage_20/centered_abs_mean": 0.07063625603914261, "signal/frontier_coverage_20/group_std_mean": 0.09437974393367768, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012643889989703895, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012643889989703895, "signal/frontier_coverage_25/centered_abs_mean": 0.05389633476734161, "signal/frontier_coverage_25/group_std_mean": 0.07225526571273803, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009647443424910307, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009647443424910307, "signal/frontier_coverage_5/centered_abs_mean": 0.07958068549633027, "signal/frontier_coverage_5/group_std_mean": 0.1058678761124611, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014244942227378487, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014244942227378487, "signal/frontier_ece_reward/centered_abs_mean": 0.014524108730256557, "signal/frontier_ece_reward/group_std_mean": 0.01814715452492237, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018155135912820697, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018155135912820697, "step": 55 }, { "calibration/aurc": 0.31221768290134777, "calibration/batch_distribution_entropy": 0.6625914790645601, "calibration/buffer_distribution_entropy": 0.6597902779721573, "calibration/confidence_entropy": 0.40553370995317506, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.09553805774278215, "calibration/coverage@15%": 0.2251968503937008, "calibration/coverage@20%": 0.31968503937007875, "calibration/coverage@25%": 0.44452919947506564, "calibration/coverage@30%": 0.5899460078534031, "calibration/coverage@5%": 0.0, "calibration/ece": 0.20356478247024595, "calibration/mean_confidence": 0.792109658834686, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009375, "completions/max_length": 3821.0, "completions/max_terminated_length": 3821.0, "completions/mean_length": 738.2202392578125, "completions/mean_terminated_length": 745.2370727539062, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.14399820002249972, "grad_norm": 0.0004931938019581139, "learning_rate": 4.457831325301205e-06, "loss": -0.0091, "num_tokens": 115195360.0, "reward": 1.0030123233795165, "reward_std": 0.15792331099510193, "rewards/accuracy_reward": 0.62734375, "rewards/brier_reward": 0.744609785079956, "rewards/confidence_uniqueness_reward": 0.7936466217041016, "rewards/format_reward": 0.9903645873069763, "rewards/frontier_aurc_reward": -0.00615367041900754, "rewards/frontier_coverage_1": 0.0065983245614916084, "rewards/frontier_coverage_10": 0.0065983245614916084, "rewards/frontier_coverage_15": 0.0065983245614916084, "rewards/frontier_coverage_20": 0.007367745554074645, "rewards/frontier_coverage_25": 0.01494669746607542, "rewards/frontier_coverage_5": 0.0065983245614916084, "rewards/frontier_ece_reward": 0.008915235195308924, "signal/accuracy_reward/centered_abs_mean": 0.18585611879825592, "signal/accuracy_reward/group_std_mean": 0.24681904017925263, "signal/accuracy_reward/group_zero_std_frac": 0.29444445073604586, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09292805939912796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09292805939912796, "signal/advantage_abs_mean": 0.11603728979825974, "signal/advantage_pre_scale_abs_mean": 0.11603728979825974, "signal/advantage_pre_scale_std": 0.18821111917495728, "signal/advantage_std": 0.18821111917495728, "signal/brier_reward/centered_abs_mean": 0.1572576254606247, "signal/brier_reward/group_std_mean": 0.20556944012641906, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019657203182578086, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019657203182578086, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08561482876539231, "signal/confidence_uniqueness_reward/group_std_mean": 0.11170679777860641, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010701853595674039, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010701853595674039, "signal/format_reward/centered_abs_mean": 0.01700846366584301, "signal/format_reward/group_std_mean": 0.03309671171009541, "signal/format_reward/group_zero_std_frac": 0.8638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008504231832921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008504231832921505, "signal/frontier_aurc_reward/centered_abs_mean": 0.007575357984751463, "signal/frontier_aurc_reward/group_std_mean": 0.012241183035075664, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013559890358010306, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013559890358010306, "signal/frontier_coverage_1/centered_abs_mean": 0.074223855137825, "signal/frontier_coverage_1/group_std_mean": 0.10750024914741516, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013286069501191379, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013286069501191379, "signal/frontier_coverage_10/centered_abs_mean": 0.074223855137825, "signal/frontier_coverage_10/group_std_mean": 0.10750024914741516, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013286069501191379, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013286069501191379, "signal/frontier_coverage_15/centered_abs_mean": 0.074223855137825, "signal/frontier_coverage_15/group_std_mean": 0.10750024914741516, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013286069501191379, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013286069501191379, "signal/frontier_coverage_20/centered_abs_mean": 0.06982820257544517, "signal/frontier_coverage_20/group_std_mean": 0.10162868052721023, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012499248143285513, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012499248143285513, "signal/frontier_coverage_25/centered_abs_mean": 0.047670333087444304, "signal/frontier_coverage_25/group_std_mean": 0.06957853436470032, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008532989420928061, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008532989420928061, "signal/frontier_coverage_5/centered_abs_mean": 0.074223855137825, "signal/frontier_coverage_5/group_std_mean": 0.10750024914741516, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013286069501191379, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013286069501191379, "signal/frontier_ece_reward/centered_abs_mean": 0.016095476038753987, "signal/frontier_ece_reward/group_std_mean": 0.020388123765587806, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020119345048442484, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020119345048442484, "step": 60 }, { "calibration/aurc": 0.2525696678555546, "calibration/batch_distribution_entropy": 0.7143370845868392, "calibration/buffer_distribution_entropy": 0.6817250857748087, "calibration/confidence_entropy": 0.4144873480218685, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.04682893809832985, "calibration/coverage@15%": 0.4208647940840097, "calibration/coverage@20%": 0.5376734252296256, "calibration/coverage@25%": 0.6471056085142334, "calibration/coverage@30%": 0.7150395778364116, "calibration/coverage@5%": 0.0, "calibration/ece": 0.15485868166194255, "calibration/mean_confidence": 0.7806578303969407, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007291666666666674, "completions/max_length": 3545.2, "completions/max_terminated_length": 3545.2, "completions/mean_length": 729.8021728515625, "completions/mean_terminated_length": 735.1742919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 213.4, "epoch": 0.1559980500243747, "grad_norm": 0.0005639444570988417, "learning_rate": 4.307228915662651e-06, "loss": -0.0053, "num_tokens": 126696729.0, "reward": 1.024180245399475, "reward_std": 0.14699944406747817, "rewards/accuracy_reward": 0.6559027791023254, "rewards/brier_reward": 0.7658300518989563, "rewards/confidence_uniqueness_reward": 0.8237267851829528, "rewards/format_reward": 0.9926215410232544, "rewards/frontier_aurc_reward": -0.003957700170576573, "rewards/frontier_coverage_1": -0.000882378313690424, "rewards/frontier_coverage_10": -0.000882378313690424, "rewards/frontier_coverage_15": -0.00038324356428347527, "rewards/frontier_coverage_20": 0.0018486734246835113, "rewards/frontier_coverage_25": 0.010639767814427614, "rewards/frontier_coverage_5": -0.000882378313690424, "rewards/frontier_ece_reward": 0.00900000799447298, "signal/accuracy_reward/centered_abs_mean": 0.1783745676279068, "signal/accuracy_reward/group_std_mean": 0.23473148345947265, "signal/accuracy_reward/group_zero_std_frac": 0.3388888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0891872838139534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0891872838139534, "signal/advantage_abs_mean": 0.10917116552591324, "signal/advantage_pre_scale_abs_mean": 0.10917116552591324, "signal/advantage_pre_scale_std": 0.17814627587795256, "signal/advantage_std": 0.17814627587795256, "signal/brier_reward/centered_abs_mean": 0.1465170204639435, "signal/brier_reward/group_std_mean": 0.18987120389938356, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018314627557992937, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018314627557992937, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08566285967826844, "signal/confidence_uniqueness_reward/group_std_mean": 0.10883112400770187, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010707857459783554, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010707857459783554, "signal/format_reward/centered_abs_mean": 0.01331922747194767, "signal/format_reward/group_std_mean": 0.028269005939364433, "signal/format_reward/group_zero_std_frac": 0.8722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006659613735973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006659613735973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.004681824566796422, "signal/frontier_aurc_reward/group_std_mean": 0.0076571997255086895, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.3804658788722e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.3804658788722e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.07828285247087478, "signal/frontier_coverage_1/group_std_mean": 0.11360109597444534, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014012629631906747, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014012629631906747, "signal/frontier_coverage_10/centered_abs_mean": 0.07828285247087478, "signal/frontier_coverage_10/group_std_mean": 0.11360109597444534, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014012629631906747, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014012629631906747, "signal/frontier_coverage_15/centered_abs_mean": 0.07654114663600922, "signal/frontier_coverage_15/group_std_mean": 0.11135455518960953, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00137008645106107, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00137008645106107, "signal/frontier_coverage_20/centered_abs_mean": 0.06982675939798355, "signal/frontier_coverage_20/group_std_mean": 0.10253714323043824, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001249898923560977, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001249898923560977, "signal/frontier_coverage_25/centered_abs_mean": 0.04172022417187691, "signal/frontier_coverage_25/group_std_mean": 0.06313150227069855, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007467919844202697, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007467919844202697, "signal/frontier_coverage_5/centered_abs_mean": 0.07828285247087478, "signal/frontier_coverage_5/group_std_mean": 0.11360109597444534, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014012629631906747, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014012629631906747, "signal/frontier_ece_reward/centered_abs_mean": 0.015052585303783417, "signal/frontier_ece_reward/group_std_mean": 0.01943938247859478, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018815731629729271, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018815731629729271, "step": 65 }, { "calibration/aurc": 0.3126631474688041, "calibration/batch_distribution_entropy": 0.7277950147979884, "calibration/buffer_distribution_entropy": 0.7063283269975165, "calibration/confidence_entropy": 0.4591715826280735, "calibration/coverage@0%": 0.008421052631578947, "calibration/coverage@1%": 0.008421052631578947, "calibration/coverage@10%": 0.08175629290617849, "calibration/coverage@15%": 0.12386155606407323, "calibration/coverage@20%": 0.1254405034324943, "calibration/coverage@25%": 0.2649911853756027, "calibration/coverage@30%": 0.39158820636490194, "calibration/coverage@5%": 0.05947368421052631, "calibration/ece": 0.16750350495179606, "calibration/mean_confidence": 0.7642698401266471, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009982638888888862, "completions/max_length": 3730.4, "completions/max_terminated_length": 3730.4, "completions/mean_length": 726.0807373046875, "completions/mean_terminated_length": 733.3897216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 192.6, "epoch": 0.16799790002624967, "grad_norm": 0.000522218644618988, "learning_rate": 4.156626506024097e-06, "loss": -0.009, "num_tokens": 138139323.0, "reward": 1.0156872868537903, "reward_std": 0.14232541620731354, "rewards/accuracy_reward": 0.634375, "rewards/brier_reward": 0.7619485020637512, "rewards/confidence_uniqueness_reward": 0.8558455348014832, "rewards/format_reward": 0.9899305582046509, "rewards/frontier_aurc_reward": -0.003244505263864994, "rewards/frontier_coverage_1": 0.0021130547567736356, "rewards/frontier_coverage_10": 0.0021130547567736356, "rewards/frontier_coverage_15": 0.0022873246343806386, "rewards/frontier_coverage_20": 0.0036932858638465405, "rewards/frontier_coverage_25": 0.009348882955964654, "rewards/frontier_coverage_5": 0.0021130547567736356, "rewards/frontier_ece_reward": 0.007843670062720775, "signal/accuracy_reward/centered_abs_mean": 0.1700954854488373, "signal/accuracy_reward/group_std_mean": 0.22446969747543336, "signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08504774272441865, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08504774272441865, "signal/advantage_abs_mean": 0.10507383644580841, "signal/advantage_pre_scale_abs_mean": 0.10507383644580841, "signal/advantage_pre_scale_std": 0.17605942785739898, "signal/advantage_std": 0.17605942785739898, "signal/brier_reward/centered_abs_mean": 0.13834567368030548, "signal/brier_reward/group_std_mean": 0.17988546192646027, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017293209210038185, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017293209210038185, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07612362504005432, "signal/confidence_uniqueness_reward/group_std_mean": 0.10020371675491332, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00951545313000679, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00951545313000679, "signal/format_reward/centered_abs_mean": 0.017078992538154127, "signal/format_reward/group_std_mean": 0.0330724623054266, "signal/format_reward/group_zero_std_frac": 0.8611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008539496269077063, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008539496269077063, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034367543645203113, "signal/frontier_aurc_reward/group_std_mean": 0.0058014895766973495, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.151790075819008e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.151790075819008e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.08826040178537368, "signal/frontier_coverage_1/group_std_mean": 0.12145550101995468, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015798611333593727, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015798611333593727, "signal/frontier_coverage_10/centered_abs_mean": 0.08826040178537368, "signal/frontier_coverage_10/group_std_mean": 0.12145550101995468, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015798611333593727, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015798611333593727, "signal/frontier_coverage_15/centered_abs_mean": 0.08678570687770844, "signal/frontier_coverage_15/group_std_mean": 0.11957939118146896, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015534641221165656, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015534641221165656, "signal/frontier_coverage_20/centered_abs_mean": 0.07486912310123443, "signal/frontier_coverage_20/group_std_mean": 0.10399644821882248, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001340157282538712, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001340157282538712, "signal/frontier_coverage_25/centered_abs_mean": 0.04686204046010971, "signal/frontier_coverage_25/group_std_mean": 0.06673805713653565, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008388305082917214, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008388305082917214, "signal/frontier_coverage_5/centered_abs_mean": 0.08826040178537368, "signal/frontier_coverage_5/group_std_mean": 0.12145550101995468, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015798611333593727, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015798611333593727, "signal/frontier_ece_reward/centered_abs_mean": 0.013867172226309777, "signal/frontier_ece_reward/group_std_mean": 0.01827750392258167, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017333965282887221, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017333965282887221, "step": 70 }, { "calibration/aurc": 0.22829603474016733, "calibration/batch_distribution_entropy": 0.7491460889045519, "calibration/buffer_distribution_entropy": 0.738965155571171, "calibration/confidence_entropy": 0.49153403613387575, "calibration/coverage@0%": 0.020480859458860516, "calibration/coverage@1%": 0.020480859458860516, "calibration/coverage@10%": 0.14642396025942012, "calibration/coverage@15%": 0.3115578189599651, "calibration/coverage@20%": 0.46366534740545295, "calibration/coverage@25%": 0.6064039138082674, "calibration/coverage@30%": 0.6646767810026385, "calibration/coverage@5%": 0.052387027437863144, "calibration/ece": 0.12678730760341095, "calibration/mean_confidence": 0.7407062089742026, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006770833333333326, "completions/max_length": 3199.8, "completions/max_terminated_length": 3199.8, "completions/mean_length": 771.2448852539062, "completions/mean_terminated_length": 776.47412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 252.8, "epoch": 0.17999775002812465, "grad_norm": 0.0006210155552253127, "learning_rate": 4.006024096385543e-06, "loss": -0.0039, "num_tokens": 150088960.0, "reward": 1.0522391557693482, "reward_std": 0.13885852843523025, "rewards/accuracy_reward": 0.6905381917953491, "rewards/brier_reward": 0.7954351902008057, "rewards/confidence_uniqueness_reward": 0.8835989475250244, "rewards/format_reward": 0.9932291626930236, "rewards/frontier_aurc_reward": -0.002661742176860571, "rewards/frontier_coverage_1": -0.009026618162170052, "rewards/frontier_coverage_10": -0.00876101772300899, "rewards/frontier_coverage_15": -0.0059734506183303894, "rewards/frontier_coverage_20": 0.00023110741749405862, "rewards/frontier_coverage_25": 0.014540878124535084, "rewards/frontier_coverage_5": -0.009026618162170052, "rewards/frontier_ece_reward": 0.006770659517496824, "signal/accuracy_reward/centered_abs_mean": 0.17407226264476777, "signal/accuracy_reward/group_std_mean": 0.22825241684913636, "signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08703613132238389, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08703613132238389, "signal/advantage_abs_mean": 0.10250708907842636, "signal/advantage_pre_scale_abs_mean": 0.10250708907842636, "signal/advantage_pre_scale_std": 0.17143681049346923, "signal/advantage_std": 0.17143681049346923, "signal/brier_reward/centered_abs_mean": 0.12454370558261871, "signal/brier_reward/group_std_mean": 0.16498080193996428, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015567963197827338, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015567963197827338, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0648738332092762, "signal/confidence_uniqueness_reward/group_std_mean": 0.0866569384932518, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008109229151159525, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008109229151159525, "signal/format_reward/centered_abs_mean": 0.01250000037252903, "signal/format_reward/group_std_mean": 0.02707981951534748, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006250000186264515, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006250000186264515, "signal/frontier_aurc_reward/centered_abs_mean": 0.003043852746486664, "signal/frontier_aurc_reward/group_std_mean": 0.005191993620246649, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.448495867312886e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.448495867312886e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.08956246227025985, "signal/frontier_coverage_1/group_std_mean": 0.12367427349090576, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016031680861487985, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016031680861487985, "signal/frontier_coverage_10/centered_abs_mean": 0.08892591893672944, "signal/frontier_coverage_10/group_std_mean": 0.1228408694267273, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015917738899588584, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015917738899588584, "signal/frontier_coverage_15/centered_abs_mean": 0.07821435481309891, "signal/frontier_coverage_15/group_std_mean": 0.10914516896009445, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014000368304550649, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014000368304550649, "signal/frontier_coverage_20/centered_abs_mean": 0.06118913814425468, "signal/frontier_coverage_20/group_std_mean": 0.08704253137111664, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010952855343930423, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010952855343930423, "signal/frontier_coverage_25/centered_abs_mean": 0.04755426123738289, "signal/frontier_coverage_25/group_std_mean": 0.0676010601222515, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008512212429195642, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008512212429195642, "signal/frontier_coverage_5/centered_abs_mean": 0.08956246227025985, "signal/frontier_coverage_5/group_std_mean": 0.12367427349090576, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016031680861487985, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016031680861487985, "signal/frontier_ece_reward/centered_abs_mean": 0.010565318539738655, "signal/frontier_ece_reward/group_std_mean": 0.014126934669911861, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001320664817467332, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001320664817467332, "step": 75 }, { "calibration/aurc": 0.2413408381036839, "calibration/batch_distribution_entropy": 0.7529266293718184, "calibration/buffer_distribution_entropy": 0.7566801887277335, "calibration/confidence_entropy": 0.4488029392924554, "calibration/coverage@0%": 0.0015625, "calibration/coverage@1%": 0.0015625, "calibration/coverage@10%": 0.052083333333333336, "calibration/coverage@15%": 0.39773123909249564, "calibration/coverage@20%": 0.5354166666666667, "calibration/coverage@25%": 0.5969049173194082, "calibration/coverage@30%": 0.8075363264842774, "calibration/coverage@5%": 0.0015625, "calibration/ece": 0.1542229314047629, "calibration/mean_confidence": 0.7623944513982001, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333348, "completions/max_length": 3758.0, "completions/max_terminated_length": 3758.0, "completions/mean_length": 846.7328125, "completions/mean_terminated_length": 855.2931884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 260.2, "epoch": 0.19199760002999963, "grad_norm": 0.0005486936424858868, "learning_rate": 3.855421686746989e-06, "loss": -0.0076, "num_tokens": 162896602.0, "reward": 1.035358762741089, "reward_std": 0.14456366002559662, "rewards/accuracy_reward": 0.663281238079071, "rewards/brier_reward": 0.7700084805488586, "rewards/confidence_uniqueness_reward": 0.8956380486488342, "rewards/format_reward": 0.9899305462837219, "rewards/frontier_aurc_reward": -0.004593700263649225, "rewards/frontier_coverage_1": -0.0053374451585114, "rewards/frontier_coverage_10": -0.005398740433156491, "rewards/frontier_coverage_15": -0.003581512067466974, "rewards/frontier_coverage_20": 0.0033742699888534844, "rewards/frontier_coverage_25": 0.01686990410089493, "rewards/frontier_coverage_5": -0.0053374451585114, "rewards/frontier_ece_reward": 0.0049498746637254955, "signal/accuracy_reward/centered_abs_mean": 0.178466796875, "signal/accuracy_reward/group_std_mean": 0.2359412580728531, "signal/accuracy_reward/group_zero_std_frac": 0.33055556416511533, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0892333984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0892333984375, "signal/advantage_abs_mean": 0.10778079181909561, "signal/advantage_pre_scale_abs_mean": 0.10778079181909561, "signal/advantage_pre_scale_std": 0.17682308256626128, "signal/advantage_std": 0.17682308256626128, "signal/brier_reward/centered_abs_mean": 0.13626245856285096, "signal/brier_reward/group_std_mean": 0.17717938125133514, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01703280732035637, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01703280732035637, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05800086259841919, "signal/confidence_uniqueness_reward/group_std_mean": 0.07589098066091537, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007250107824802399, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007250107824802399, "signal/format_reward/centered_abs_mean": 0.015288628451526166, "signal/format_reward/group_std_mean": 0.02700880281627178, "signal/format_reward/group_zero_std_frac": 0.8916666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007644314225763083, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007644314225763083, "signal/frontier_aurc_reward/centered_abs_mean": 0.00542384460568428, "signal/frontier_aurc_reward/group_std_mean": 0.008771744929254055, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.708681755000725e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.708681755000725e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.08281062692403793, "signal/frontier_coverage_1/group_std_mean": 0.11733318269252777, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014823101460933684, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014823101460933684, "signal/frontier_coverage_10/centered_abs_mean": 0.08166301399469375, "signal/frontier_coverage_10/group_std_mean": 0.11585556566715241, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014617678243666887, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014617678243666887, "signal/frontier_coverage_15/centered_abs_mean": 0.07230544909834861, "signal/frontier_coverage_15/group_std_mean": 0.10374643951654434, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012942674802616239, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012942674802616239, "signal/frontier_coverage_20/centered_abs_mean": 0.05011899545788765, "signal/frontier_coverage_20/group_std_mean": 0.07308039665222169, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008971299859695137, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008971299859695137, "signal/frontier_coverage_25/centered_abs_mean": 0.045297824591398236, "signal/frontier_coverage_25/group_std_mean": 0.06296739131212234, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008108310401439666, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008108310401439666, "signal/frontier_coverage_5/centered_abs_mean": 0.08281062692403793, "signal/frontier_coverage_5/group_std_mean": 0.11733318269252777, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014823101460933684, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014823101460933684, "signal/frontier_ece_reward/centered_abs_mean": 0.009097871743142605, "signal/frontier_ece_reward/group_std_mean": 0.012163439951837062, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011372339678928256, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011372339678928256, "step": 80 }, { "calibration/aurc": 0.2549322254632649, "calibration/batch_distribution_entropy": 0.7912315705537434, "calibration/buffer_distribution_entropy": 0.7749982822016868, "calibration/confidence_entropy": 0.4605557325249568, "calibration/coverage@0%": 0.0026484929078014184, "calibration/coverage@1%": 0.0026484929078014184, "calibration/coverage@10%": 0.04064321586294653, "calibration/coverage@15%": 0.14667673071165252, "calibration/coverage@20%": 0.2545642286064485, "calibration/coverage@25%": 0.39401181242340777, "calibration/coverage@30%": 0.7916024920164326, "calibration/coverage@5%": 0.0026484929078014184, "calibration/ece": 0.12474794704579159, "calibration/mean_confidence": 0.7407469510867013, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006076388888888906, "completions/max_length": 3656.8, "completions/max_terminated_length": 3656.8, "completions/mean_length": 855.488623046875, "completions/mean_terminated_length": 860.7367919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 319.0, "epoch": 0.2039974500318746, "grad_norm": 0.00048303132643923163, "learning_rate": 3.7048192771084342e-06, "loss": -0.0039, "num_tokens": 175839031.0, "reward": 1.049242663383484, "reward_std": 0.13771760761737822, "rewards/accuracy_reward": 0.6803819417953492, "rewards/brier_reward": 0.7834740161895752, "rewards/confidence_uniqueness_reward": 0.9058789372444153, "rewards/format_reward": 0.993663203716278, "rewards/frontier_aurc_reward": -0.004763692617416382, "rewards/frontier_coverage_1": -0.0024880644166842105, "rewards/frontier_coverage_10": -0.0016338142449967563, "rewards/frontier_coverage_15": 0.00023350361734628678, "rewards/frontier_coverage_20": 0.007665848324541003, "rewards/frontier_coverage_25": 0.025208524614572524, "rewards/frontier_coverage_5": -0.0024880644166842105, "rewards/frontier_ece_reward": 0.0052955283783376215, "signal/accuracy_reward/centered_abs_mean": 0.16981336772441863, "signal/accuracy_reward/group_std_mean": 0.22479016780853273, "signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08490668386220931, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08490668386220931, "signal/advantage_abs_mean": 0.0999684289097786, "signal/advantage_pre_scale_abs_mean": 0.0999684289097786, "signal/advantage_pre_scale_std": 0.17038570940494538, "signal/advantage_std": 0.17038570940494538, "signal/brier_reward/centered_abs_mean": 0.12644084244966508, "signal/brier_reward/group_std_mean": 0.16933887600898742, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015805105306208135, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015805105306208135, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.053865256160497664, "signal/confidence_uniqueness_reward/group_std_mean": 0.0750114805996418, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006733157020062208, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006733157020062208, "signal/format_reward/centered_abs_mean": 0.011767578125, "signal/format_reward/group_std_mean": 0.02580878436565399, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0058837890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0058837890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.005690837278962135, "signal/frontier_aurc_reward/group_std_mean": 0.00979206943884492, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010186598519794643, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010186598519794643, "signal/frontier_coverage_1/centered_abs_mean": 0.07988283634185792, "signal/frontier_coverage_1/group_std_mean": 0.11243547201156616, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014299027621746062, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014299027621746062, "signal/frontier_coverage_10/centered_abs_mean": 0.07791899591684341, "signal/frontier_coverage_10/group_std_mean": 0.1098724588751793, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013947500381618738, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013947500381618738, "signal/frontier_coverage_15/centered_abs_mean": 0.07281370237469673, "signal/frontier_coverage_15/group_std_mean": 0.10328521132469178, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001303365221247077, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001303365221247077, "signal/frontier_coverage_20/centered_abs_mean": 0.050810272246599196, "signal/frontier_coverage_20/group_std_mean": 0.07330892160534859, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009095038287341595, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009095038287341595, "signal/frontier_coverage_25/centered_abs_mean": 0.04489777684211731, "signal/frontier_coverage_25/group_std_mean": 0.06135682612657547, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008036701823584735, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008036701823584735, "signal/frontier_coverage_5/centered_abs_mean": 0.07988283634185792, "signal/frontier_coverage_5/group_std_mean": 0.11243547201156616, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014299027621746062, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014299027621746062, "signal/frontier_ece_reward/centered_abs_mean": 0.00815775478258729, "signal/frontier_ece_reward/group_std_mean": 0.010810710676014423, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010197193478234112, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010197193478234112, "step": 85 }, { "calibration/aurc": 0.1884037021642985, "calibration/batch_distribution_entropy": 0.8032843437338201, "calibration/buffer_distribution_entropy": 0.7924773042956995, "calibration/confidence_entropy": 0.5132141674135235, "calibration/coverage@0%": 0.00209705428128203, "calibration/coverage@1%": 0.00209705428128203, "calibration/coverage@10%": 0.13159094607883876, "calibration/coverage@15%": 0.45068144103455543, "calibration/coverage@20%": 0.6938492505588094, "calibration/coverage@25%": 0.8097932670542966, "calibration/coverage@30%": 0.9129219364513481, "calibration/coverage@5%": 0.0453262209479487, "calibration/ece": 0.07574494386976773, "calibration/mean_confidence": 0.7050964063928427, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009375, "completions/max_length": 3446.2, "completions/max_terminated_length": 3446.2, "completions/mean_length": 814.8932250976562, "completions/mean_terminated_length": 822.6298583984375, "completions/min_length": 0.0, "completions/min_terminated_length": 268.6, "epoch": 0.2159973000337496, "grad_norm": 0.0005044998251833022, "learning_rate": 3.5542168674698798e-06, "loss": -0.0078, "num_tokens": 188295273.0, "reward": 1.048031497001648, "reward_std": 0.13147322535514833, "rewards/accuracy_reward": 0.6767361044883728, "rewards/brier_reward": 0.7857626795768737, "rewards/confidence_uniqueness_reward": 0.9275232076644897, "rewards/format_reward": 0.990625, "rewards/frontier_aurc_reward": -0.003035349538549781, "rewards/frontier_coverage_1": -0.009896452794782818, "rewards/frontier_coverage_10": -0.00977154376450926, "rewards/frontier_coverage_15": -0.008140944095794111, "rewards/frontier_coverage_20": 0.0010742006823420524, "rewards/frontier_coverage_25": 0.024561950564384462, "rewards/frontier_coverage_5": -0.009896452794782818, "rewards/frontier_ece_reward": 0.0036846227245405315, "signal/accuracy_reward/centered_abs_mean": 0.1624348998069763, "signal/accuracy_reward/group_std_mean": 0.2136551856994629, "signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08121744990348816, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08121744990348816, "signal/advantage_abs_mean": 0.09799883216619491, "signal/advantage_pre_scale_abs_mean": 0.09799883216619491, "signal/advantage_pre_scale_std": 0.16718345284461975, "signal/advantage_std": 0.16718345284461975, "signal/brier_reward/centered_abs_mean": 0.1226424291729927, "signal/brier_reward/group_std_mean": 0.16106078028678894, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015330303646624088, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015330303646624088, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.040507809817790986, "signal/confidence_uniqueness_reward/group_std_mean": 0.05724046900868416, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005063476227223873, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005063476227223873, "signal/format_reward/centered_abs_mean": 0.01504991315305233, "signal/format_reward/group_std_mean": 0.026866191625595094, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007524956576526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007524956576526165, "signal/frontier_aurc_reward/centered_abs_mean": 0.003690991410985589, "signal/frontier_aurc_reward/group_std_mean": 0.00686999736353755, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.606874376302585e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.606874376302585e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.09332177340984345, "signal/frontier_coverage_1/group_std_mean": 0.12926909923553467, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016704596579074859, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016704596579074859, "signal/frontier_coverage_10/centered_abs_mean": 0.09242226481437683, "signal/frontier_coverage_10/group_std_mean": 0.12810271680355073, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001654358464293182, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001654358464293182, "signal/frontier_coverage_15/centered_abs_mean": 0.08840162605047226, "signal/frontier_coverage_15/group_std_mean": 0.12299361377954483, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00158238906878978, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00158238906878978, "signal/frontier_coverage_20/centered_abs_mean": 0.058317091315984726, "signal/frontier_coverage_20/group_std_mean": 0.08401793241500854, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010438758763484657, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010438758763484657, "signal/frontier_coverage_25/centered_abs_mean": 0.04199025183916092, "signal/frontier_coverage_25/group_std_mean": 0.05837389156222343, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007516254670917987, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007516254670917987, "signal/frontier_coverage_5/centered_abs_mean": 0.09332177340984345, "signal/frontier_coverage_5/group_std_mean": 0.12926909923553467, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016704596579074859, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016704596579074859, "signal/frontier_ece_reward/centered_abs_mean": 0.007873425912111997, "signal/frontier_ece_reward/group_std_mean": 0.01113816760480404, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009841782390139996, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009841782390139996, "step": 90 }, { "calibration/aurc": 0.2511498546221036, "calibration/batch_distribution_entropy": 0.8472823428192587, "calibration/buffer_distribution_entropy": 0.8292602035451393, "calibration/confidence_entropy": 0.5268932324887918, "calibration/coverage@0%": 0.007878203361807659, "calibration/coverage@1%": 0.007878203361807659, "calibration/coverage@10%": 0.20039129236704323, "calibration/coverage@15%": 0.38163405896941305, "calibration/coverage@20%": 0.5147368421052632, "calibration/coverage@25%": 0.5347368421052632, "calibration/coverage@30%": 0.5657894736842105, "calibration/coverage@5%": 0.023141361256544504, "calibration/ece": 0.1191731328965677, "calibration/mean_confidence": 0.673483074709598, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0039062500000000226, "completions/max_length": 3117.4, "completions/max_terminated_length": 3117.4, "completions/mean_length": 822.5976684570312, "completions/mean_terminated_length": 825.820654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 249.4, "epoch": 0.22799715003562457, "grad_norm": 0.0005109178018756211, "learning_rate": 3.4036144578313257e-06, "loss": -0.0023, "num_tokens": 200863278.0, "reward": 1.0485934257507323, "reward_std": 0.12660266309976578, "rewards/accuracy_reward": 0.668749988079071, "rewards/brier_reward": 0.7837409973144531, "rewards/confidence_uniqueness_reward": 0.9414145112037658, "rewards/format_reward": 0.9959201335906982, "rewards/frontier_aurc_reward": -0.003332670731469989, "rewards/frontier_coverage_1": -0.00962460646405816, "rewards/frontier_coverage_10": -0.009027575980871916, "rewards/frontier_coverage_15": -0.0062865779735147955, "rewards/frontier_coverage_20": 0.006169534660875798, "rewards/frontier_coverage_25": 0.049424213171005246, "rewards/frontier_coverage_5": -0.00962460646405816, "rewards/frontier_ece_reward": 0.0023768938961438836, "signal/accuracy_reward/centered_abs_mean": 0.16069878339767457, "signal/accuracy_reward/group_std_mean": 0.2120576322078705, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08034939169883729, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08034939169883729, "signal/advantage_abs_mean": 0.09407227784395218, "signal/advantage_pre_scale_abs_mean": 0.09407227784395218, "signal/advantage_pre_scale_std": 0.1572835475206375, "signal/advantage_std": 0.1572835475206375, "signal/brier_reward/centered_abs_mean": 0.12441224157810211, "signal/brier_reward/group_std_mean": 0.16177276968955995, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015551530197262764, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015551530197262764, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031784088164567945, "signal/confidence_uniqueness_reward/group_std_mean": 0.04672372043132782, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003973011020570993, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003973011020570993, "signal/format_reward/centered_abs_mean": 0.007590060774236918, "signal/format_reward/group_std_mean": 0.01747054308652878, "signal/format_reward/group_zero_std_frac": 0.9166666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003795030387118459, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003795030387118459, "signal/frontier_aurc_reward/centered_abs_mean": 0.004074021661654115, "signal/frontier_aurc_reward/group_std_mean": 0.007213028613477945, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.29249841242563e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.29249841242563e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11047781854867936, "signal/frontier_coverage_1/group_std_mean": 0.1514023333787918, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001977552776224911, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001977552776224911, "signal/frontier_coverage_10/centered_abs_mean": 0.10917936712503433, "signal/frontier_coverage_10/group_std_mean": 0.14975302815437316, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001954310527071357, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001954310527071357, "signal/frontier_coverage_15/centered_abs_mean": 0.10132132470607758, "signal/frontier_coverage_15/group_std_mean": 0.13981849551200867, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018136516213417054, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018136516213417054, "signal/frontier_coverage_20/centered_abs_mean": 0.06076301485300064, "signal/frontier_coverage_20/group_std_mean": 0.08724861890077591, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010876578977331518, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010876578977331518, "signal/frontier_coverage_25/centered_abs_mean": 0.06519225090742112, "signal/frontier_coverage_25/group_std_mean": 0.08802225440740585, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011669412604533135, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011669412604533135, "signal/frontier_coverage_5/centered_abs_mean": 0.11047781854867936, "signal/frontier_coverage_5/group_std_mean": 0.1514023333787918, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001977552776224911, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001977552776224911, "signal/frontier_ece_reward/centered_abs_mean": 0.009134245105087757, "signal/frontier_ece_reward/group_std_mean": 0.01337057575583458, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011417806381359696, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011417806381359696, "step": 95 }, { "calibration/aurc": 0.20278364231917356, "calibration/batch_distribution_entropy": 0.8280205913017324, "calibration/buffer_distribution_entropy": 0.8403697455023262, "calibration/confidence_entropy": 0.5017551241839072, "calibration/coverage@0%": 0.004732741740615757, "calibration/coverage@1%": 0.004732741740615757, "calibration/coverage@10%": 0.15281197352277737, "calibration/coverage@15%": 0.4388613860856143, "calibration/coverage@20%": 0.591331061916468, "calibration/coverage@25%": 0.7446475195822455, "calibration/coverage@30%": 0.8691493747423389, "calibration/coverage@5%": 0.02478023580385785, "calibration/ece": 0.1056119338962247, "calibration/mean_confidence": 0.7101517376892353, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006076388888888884, "completions/max_length": 3497.0, "completions/max_terminated_length": 3497.0, "completions/mean_length": 854.01025390625, "completions/mean_terminated_length": 859.19150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 278.8, "epoch": 0.23999700003749952, "grad_norm": 0.0004937337362207472, "learning_rate": 3.2530120481927713e-06, "loss": -0.0045, "num_tokens": 213800548.0, "reward": 1.0567084312438966, "reward_std": 0.1290334552526474, "rewards/accuracy_reward": 0.6779513835906983, "rewards/brier_reward": 0.8075439810752869, "rewards/confidence_uniqueness_reward": 0.9373886227607727, "rewards/format_reward": 0.993663203716278, "rewards/frontier_aurc_reward": -0.0039381059817969796, "rewards/frontier_coverage_1": 0.012442300096154213, "rewards/frontier_coverage_10": 0.013322338275611401, "rewards/frontier_coverage_15": 0.015895536914467812, "rewards/frontier_coverage_20": 0.022733899392187597, "rewards/frontier_coverage_25": 0.05824657455086708, "rewards/frontier_coverage_5": 0.012462735641747713, "rewards/frontier_ece_reward": 0.0034934583585709334, "signal/accuracy_reward/centered_abs_mean": 0.16227213740348817, "signal/accuracy_reward/group_std_mean": 0.21252356767654418, "signal/accuracy_reward/group_zero_std_frac": 0.397222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08113606870174409, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08113606870174409, "signal/advantage_abs_mean": 0.09522689133882523, "signal/advantage_pre_scale_abs_mean": 0.09522689133882523, "signal/advantage_pre_scale_std": 0.16290957629680633, "signal/advantage_std": 0.16290957629680633, "signal/brier_reward/centered_abs_mean": 0.11964384317398072, "signal/brier_reward/group_std_mean": 0.15935627818107606, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01495548039674759, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01495548039674759, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03456774652004242, "signal/confidence_uniqueness_reward/group_std_mean": 0.052286341041326526, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004320968315005303, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004320968315005303, "signal/format_reward/centered_abs_mean": 0.011051432322710752, "signal/format_reward/group_std_mean": 0.024242669716477393, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005525716161355376, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005525716161355376, "signal/frontier_aurc_reward/centered_abs_mean": 0.005287631414830685, "signal/frontier_aurc_reward/group_std_mean": 0.009730769135057927, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.464860195294023e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.464860195294023e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1009969249367714, "signal/frontier_coverage_1/group_std_mean": 0.13951509296894074, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018078448716551065, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018078448716551065, "signal/frontier_coverage_10/centered_abs_mean": 0.09884380400180817, "signal/frontier_coverage_10/group_std_mean": 0.136749067902565, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017693039961159229, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017693039961159229, "signal/frontier_coverage_15/centered_abs_mean": 0.0821550577878952, "signal/frontier_coverage_15/group_std_mean": 0.11540477871894836, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014705754816532134, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014705754816532134, "signal/frontier_coverage_20/centered_abs_mean": 0.05389119237661362, "signal/frontier_coverage_20/group_std_mean": 0.07688918858766555, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009646523278206587, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009646523278206587, "signal/frontier_coverage_25/centered_abs_mean": 0.057782044261693956, "signal/frontier_coverage_25/group_std_mean": 0.07765513509511948, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010342985624447465, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010342985624447465, "signal/frontier_coverage_5/centered_abs_mean": 0.10051819980144501, "signal/frontier_coverage_5/group_std_mean": 0.13889843970537186, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017992756562307476, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017992756562307476, "signal/frontier_ece_reward/centered_abs_mean": 0.006692274007946253, "signal/frontier_ece_reward/group_std_mean": 0.009778609126806259, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008365342509932816, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008365342509932816, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.19508682084988796, "eval_calibration/batch_distribution_entropy": 0.7643132158883287, "eval_calibration/buffer_distribution_entropy": 0.8500895559339968, "eval_calibration/confidence_entropy": 0.4922978578588346, "eval_calibration/coverage@0%": 0.16515456989247312, "eval_calibration/coverage@1%": 0.16515456989247312, "eval_calibration/coverage@10%": 0.24579973118279572, "eval_calibration/coverage@15%": 0.45060483870967744, "eval_calibration/coverage@20%": 0.6233198924731183, "eval_calibration/coverage@25%": 0.8323252688172044, "eval_calibration/coverage@30%": 0.9322916666666666, "eval_calibration/coverage@5%": 0.18128360215053763, "eval_calibration/ece": 0.13874366599462365, "eval_calibration/mean_confidence": 0.7162100638440861, "eval_completions/clipped_ratio": 0.006076388888888895, "eval_completions/max_length": 2862.6666666666665, "eval_completions/max_terminated_length": 2862.6666666666665, "eval_completions/mean_length": 856.265635172526, "eval_completions/mean_terminated_length": 861.4902547200521, "eval_completions/min_length": 122.5, "eval_completions/min_terminated_length": 338.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 213800548.0, "eval_reward": 1.040560742219289, "eval_reward_std": 0.26353143403927487, "eval_rewards/accuracy_reward": 0.667534718910853, "eval_rewards/brier_reward": 0.7879191040992737, "eval_rewards/confidence_uniqueness_reward": 0.8808565934499105, "eval_rewards/format_reward": 0.9930555621782938, "eval_rewards/frontier_aurc_reward": -0.004968842452702423, "eval_rewards/frontier_coverage_1": 0.004989876101414363, "eval_rewards/frontier_coverage_10": 0.00570684849905471, "eval_rewards/frontier_coverage_15": 0.007359214670335253, "eval_rewards/frontier_coverage_20": 0.015881775800759595, "eval_rewards/frontier_coverage_25": 0.04367877449840307, "eval_rewards/frontier_coverage_5": 0.004989876101414363, "eval_rewards/frontier_ece_reward": 0.002231398791385194, "eval_runtime": 189.412, "eval_samples_per_second": 5.279, "eval_signal/accuracy_reward/centered_abs_mean": 0.4254014740387599, "eval_signal/accuracy_reward/group_std_mean": 0.46698982020219165, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21270073701937994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21270073701937994, "eval_signal/advantage_abs_mean": 0.23313634594281515, "eval_signal/advantage_pre_scale_abs_mean": 0.23313634594281515, "eval_signal/advantage_pre_scale_std": 0.26145924379428226, "eval_signal/advantage_std": 0.26145924379428226, "eval_signal/brier_reward/centered_abs_mean": 0.20662419497966766, "eval_signal/brier_reward/group_std_mean": 0.2611635724703471, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025828024372458458, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.025828024372458458, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05225155937174956, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07714233547449112, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006531444921468695, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006531444921468695, "eval_signal/format_reward/centered_abs_mean": 0.013346354166666666, "eval_signal/format_reward/group_std_mean": 0.03629430073002974, "eval_signal/format_reward/group_zero_std_frac": 0.8055555721124014, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006673177083333333, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.006673177083333333, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.008770209504291415, "eval_signal/frontier_aurc_reward/group_std_mean": 0.017012828961014748, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00015698675148693533, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00015698675148693533, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.11700086171428363, "eval_signal/frontier_coverage_1/group_std_mean": 0.19166247049967447, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002094315461969624, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002094315461969624, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.11191717411080997, "eval_signal/frontier_coverage_10/group_std_mean": 0.18493242065111795, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020033172719801464, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020033172719801464, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.08915293340881665, "eval_signal/frontier_coverage_15/group_std_mean": 0.15359986076752344, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015958373939308028, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015958373939308028, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.05830358279248079, "eval_signal/frontier_coverage_20/group_std_mean": 0.09689446166157722, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010436340235173702, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010436340235173702, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.08435119688510895, "eval_signal/frontier_coverage_25/group_std_mean": 0.10973832756280899, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015098864290242393, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015098864290242393, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.11700086171428363, "eval_signal/frontier_coverage_5/group_std_mean": 0.19166247049967447, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002094315461969624, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002094315461969624, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.006453142423803608, "eval_signal/frontier_ece_reward/group_std_mean": 0.010442674780885378, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000806642802975451, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000806642802975451, "eval_steps_per_second": 0.032, "step": 100 }, { "calibration/aurc": 0.3511401691340529, "calibration/batch_distribution_entropy": 0.757278262607608, "calibration/buffer_distribution_entropy": 0.8430223025636077, "calibration/confidence_entropy": 0.4727277883725902, "calibration/coverage@0%": 0.0010416666666666667, "calibration/coverage@1%": 0.0010416666666666667, "calibration/coverage@10%": 0.10262061403508774, "calibration/coverage@15%": 0.13788377192982457, "calibration/coverage@20%": 0.19810855263157895, "calibration/coverage@25%": 0.3067105263157895, "calibration/coverage@30%": 0.3810792349726776, "calibration/coverage@5%": 0.033146929824561404, "calibration/ece": 0.18226900464138213, "calibration/mean_confidence": 0.7595750235377008, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010156249999999978, "completions/max_length": 3543.0, "completions/max_terminated_length": 3543.0, "completions/mean_length": 862.9362915039062, "completions/mean_terminated_length": 871.67646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 279.2, "epoch": 0.2519968500393745, "grad_norm": 0.0005761328502558172, "learning_rate": 3.1024096385542172e-06, "loss": -0.0081, "num_tokens": 226818438.0, "reward": 1.048232102394104, "reward_std": 0.1332755818963051, "rewards/accuracy_reward": 0.6735243082046509, "rewards/brier_reward": 0.7883717060089112, "rewards/confidence_uniqueness_reward": 0.9287825942039489, "rewards/format_reward": 0.9895833253860473, "rewards/frontier_aurc_reward": -0.00578044205904007, "rewards/frontier_coverage_1": 0.007354969310108572, "rewards/frontier_coverage_10": 0.007265249360352754, "rewards/frontier_coverage_15": 0.009339299611747265, "rewards/frontier_coverage_20": 0.018504777178168296, "rewards/frontier_coverage_25": 0.051360327005386355, "rewards/frontier_coverage_5": 0.007354969310108572, "rewards/frontier_ece_reward": 0.0026106106583029033, "signal/accuracy_reward/centered_abs_mean": 0.15887044221162797, "signal/accuracy_reward/group_std_mean": 0.21121549904346465, "signal/accuracy_reward/group_zero_std_frac": 0.397222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07943522110581398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07943522110581398, "signal/advantage_abs_mean": 0.09754386842250824, "signal/advantage_pre_scale_abs_mean": 0.09754386842250824, "signal/advantage_pre_scale_std": 0.16947126388549805, "signal/advantage_std": 0.16947126388549805, "signal/brier_reward/centered_abs_mean": 0.1252099484205246, "signal/brier_reward/group_std_mean": 0.16566490530967712, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015651243552565575, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015651243552565575, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04011865481734276, "signal/confidence_uniqueness_reward/group_std_mean": 0.057522188127040866, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005014831852167845, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005014831852167845, "signal/format_reward/centered_abs_mean": 0.016981336660683154, "signal/format_reward/group_std_mean": 0.029765255004167556, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008490668330341577, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008490668330341577, "signal/frontier_aurc_reward/centered_abs_mean": 0.007304486818611622, "signal/frontier_aurc_reward/group_std_mean": 0.013138260692358017, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013075030146865175, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013075030146865175, "signal/frontier_coverage_1/centered_abs_mean": 0.0824627086520195, "signal/frontier_coverage_1/group_std_mean": 0.1177333876490593, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014760824386030435, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014760824386030435, "signal/frontier_coverage_10/centered_abs_mean": 0.08060138821601867, "signal/frontier_coverage_10/group_std_mean": 0.11523205190896987, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014427647460252047, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014427647460252047, "signal/frontier_coverage_15/centered_abs_mean": 0.07278760075569153, "signal/frontier_coverage_15/group_std_mean": 0.10448294579982757, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013028981164097786, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013028981164097786, "signal/frontier_coverage_20/centered_abs_mean": 0.04164608493447304, "signal/frontier_coverage_20/group_std_mean": 0.060693875700235364, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007454649079591036, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007454649079591036, "signal/frontier_coverage_25/centered_abs_mean": 0.05113328471779823, "signal/frontier_coverage_25/group_std_mean": 0.06808223649859428, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009152857935987412, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009152857935987412, "signal/frontier_coverage_5/centered_abs_mean": 0.0824627086520195, "signal/frontier_coverage_5/group_std_mean": 0.1177333876490593, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014760824386030435, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014760824386030435, "signal/frontier_ece_reward/centered_abs_mean": 0.004956904333084822, "signal/frontier_ece_reward/group_std_mean": 0.006825331319123507, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006196130416356027, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006196130416356027, "step": 105 }, { "calibration/aurc": 0.20816277950066214, "calibration/batch_distribution_entropy": 0.7404046559516211, "calibration/buffer_distribution_entropy": 0.8073233848731809, "calibration/confidence_entropy": 0.4333681136935855, "calibration/coverage@0%": 0.0010416666666666667, "calibration/coverage@1%": 0.0010416666666666667, "calibration/coverage@10%": 0.16827268276762403, "calibration/coverage@15%": 0.3403001671154977, "calibration/coverage@20%": 0.48926075741938135, "calibration/coverage@25%": 0.6757180156657964, "calibration/coverage@30%": 0.8710182767624021, "calibration/coverage@5%": 0.03125, "calibration/ece": 0.1267235183366333, "calibration/mean_confidence": 0.7824960816740748, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008506944444444465, "completions/max_length": 3628.6, "completions/max_terminated_length": 3628.6, "completions/mean_length": 902.2748291015625, "completions/mean_terminated_length": 910.0505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 308.2, "epoch": 0.2639967000412495, "grad_norm": 0.0005406757700257003, "learning_rate": 2.9518072289156627e-06, "loss": -0.0053, "num_tokens": 240321092.0, "reward": 1.068948006629944, "reward_std": 0.13193423300981522, "rewards/accuracy_reward": 0.7086805462837219, "rewards/brier_reward": 0.8089686393737793, "rewards/confidence_uniqueness_reward": 0.9270894527435303, "rewards/format_reward": 0.9913194417953491, "rewards/frontier_aurc_reward": -0.004053758783265948, "rewards/frontier_coverage_1": 0.004211192601360381, "rewards/frontier_coverage_10": 0.004211192601360381, "rewards/frontier_coverage_15": 0.0068603390827775, "rewards/frontier_coverage_20": 0.0205037584528327, "rewards/frontier_coverage_25": 0.050310605019330976, "rewards/frontier_coverage_5": 0.004211192601360381, "rewards/frontier_ece_reward": 0.0031742799561470746, "signal/accuracy_reward/centered_abs_mean": 0.15706380307674409, "signal/accuracy_reward/group_std_mean": 0.2148415267467499, "signal/accuracy_reward/group_zero_std_frac": 0.36111111044883726, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07853190153837204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07853190153837204, "signal/advantage_abs_mean": 0.09467502385377884, "signal/advantage_pre_scale_abs_mean": 0.09467502385377884, "signal/advantage_pre_scale_std": 0.16729762852191926, "signal/advantage_std": 0.16729762852191926, "signal/brier_reward/centered_abs_mean": 0.1214766725897789, "signal/brier_reward/group_std_mean": 0.16396571099758148, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015184584073722363, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015184584073722363, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.038360346108675, "signal/confidence_uniqueness_reward/group_std_mean": 0.05404561161994934, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004795043263584375, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004795043263584375, "signal/format_reward/centered_abs_mean": 0.01239149309694767, "signal/format_reward/group_std_mean": 0.023483334854245187, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006195746548473835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006195746548473835, "signal/frontier_aurc_reward/centered_abs_mean": 0.005569100752472878, "signal/frontier_aurc_reward/group_std_mean": 0.010228174738585949, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.968689846573398e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.968689846573398e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.07596084028482437, "signal/frontier_coverage_1/group_std_mean": 0.1094392940402031, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013596989447250962, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013596989447250962, "signal/frontier_coverage_10/centered_abs_mean": 0.07596084028482437, "signal/frontier_coverage_10/group_std_mean": 0.1094392940402031, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013596989447250962, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013596989447250962, "signal/frontier_coverage_15/centered_abs_mean": 0.0660040944814682, "signal/frontier_coverage_15/group_std_mean": 0.09634122103452683, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011814731871709228, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011814731871709228, "signal/frontier_coverage_20/centered_abs_mean": 0.03823840469121933, "signal/frontier_coverage_20/group_std_mean": 0.055505610257387164, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006844674120657146, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006844674120657146, "signal/frontier_coverage_25/centered_abs_mean": 0.04834548756480217, "signal/frontier_coverage_25/group_std_mean": 0.06364585980772972, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008653842494823038, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008653842494823038, "signal/frontier_coverage_5/centered_abs_mean": 0.07596084028482437, "signal/frontier_coverage_5/group_std_mean": 0.1094392940402031, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013596989447250962, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013596989447250962, "signal/frontier_ece_reward/centered_abs_mean": 0.004714849684387445, "signal/frontier_ece_reward/group_std_mean": 0.006203057337552309, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005893562105484307, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005893562105484307, "step": 110 }, { "calibration/aurc": 0.3535007824326083, "calibration/batch_distribution_entropy": 0.7879573423744334, "calibration/buffer_distribution_entropy": 0.7750194996530027, "calibration/confidence_entropy": 0.46526847791535486, "calibration/coverage@0%": 0.003172045997444586, "calibration/coverage@1%": 0.003172045997444586, "calibration/coverage@10%": 0.003172045997444586, "calibration/coverage@15%": 0.056895450252763734, "calibration/coverage@20%": 0.15955502472084884, "calibration/coverage@25%": 0.2010443864229765, "calibration/coverage@30%": 0.4401978255764156, "calibration/coverage@5%": 0.003172045997444586, "calibration/ece": 0.22863723387297646, "calibration/mean_confidence": 0.7457530588732254, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 3994.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 954.9671020507812, "completions/mean_terminated_length": 965.1707153320312, "completions/min_length": 0.0, "completions/min_terminated_length": 328.0, "epoch": 0.27599655004312446, "grad_norm": 0.0004946527187712491, "learning_rate": 2.8012048192771087e-06, "loss": -0.0074, "num_tokens": 254401513.0, "reward": 1.04098482131958, "reward_std": 0.13563843965530395, "rewards/accuracy_reward": 0.6626736164093018, "rewards/brier_reward": 0.7801254391670227, "rewards/confidence_uniqueness_reward": 0.924898338317871, "rewards/format_reward": 0.9892361044883728, "rewards/frontier_aurc_reward": -0.004975694324821233, "rewards/frontier_coverage_1": 0.00669761549361283, "rewards/frontier_coverage_10": 0.00669761549361283, "rewards/frontier_coverage_15": 0.0074621538631618025, "rewards/frontier_coverage_20": 0.016340048145502807, "rewards/frontier_coverage_25": 0.046477542445063594, "rewards/frontier_coverage_5": 0.00669761549361283, "rewards/frontier_ece_reward": 0.002986938552930951, "signal/accuracy_reward/centered_abs_mean": 0.16362847089767457, "signal/accuracy_reward/group_std_mean": 0.2102369487285614, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08181423544883729, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08181423544883729, "signal/advantage_abs_mean": 0.10116954147815704, "signal/advantage_pre_scale_abs_mean": 0.10116954147815704, "signal/advantage_pre_scale_std": 0.1757548063993454, "signal/advantage_std": 0.1757548063993454, "signal/brier_reward/centered_abs_mean": 0.13105646818876265, "signal/brier_reward/group_std_mean": 0.1718491792678833, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01638205852359533, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01638205852359533, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04196493178606033, "signal/confidence_uniqueness_reward/group_std_mean": 0.06228391453623772, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0052456164732575415, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0052456164732575415, "signal/format_reward/centered_abs_mean": 0.016406250139698385, "signal/format_reward/group_std_mean": 0.0324904628098011, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008203125069849193, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008203125069849193, "signal/frontier_aurc_reward/centered_abs_mean": 0.006411750707775355, "signal/frontier_aurc_reward/group_std_mean": 0.011484375223517418, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001147703340393491, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001147703340393491, "signal/frontier_coverage_1/centered_abs_mean": 0.07490146160125732, "signal/frontier_coverage_1/group_std_mean": 0.1070878341794014, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013407361460849644, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013407361460849644, "signal/frontier_coverage_10/centered_abs_mean": 0.07490146160125732, "signal/frontier_coverage_10/group_std_mean": 0.1070878341794014, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013407361460849644, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013407361460849644, "signal/frontier_coverage_15/centered_abs_mean": 0.06685705333948136, "signal/frontier_coverage_15/group_std_mean": 0.09658489525318145, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011967412428930402, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011967412428930402, "signal/frontier_coverage_20/centered_abs_mean": 0.040889284759759906, "signal/frontier_coverage_20/group_std_mean": 0.05778271481394768, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000731918157543987, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000731918157543987, "signal/frontier_coverage_25/centered_abs_mean": 0.055317191779613493, "signal/frontier_coverage_25/group_std_mean": 0.07240601480007172, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009901776560582221, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009901776560582221, "signal/frontier_coverage_5/centered_abs_mean": 0.07490146160125732, "signal/frontier_coverage_5/group_std_mean": 0.1070878341794014, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013407361460849644, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013407361460849644, "signal/frontier_ece_reward/centered_abs_mean": 0.005480154789984227, "signal/frontier_ece_reward/group_std_mean": 0.007094941008836031, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006850193487480283, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006850193487480283, "step": 115 }, { "calibration/aurc": 0.27590543776197624, "calibration/batch_distribution_entropy": 0.7837476364249507, "calibration/buffer_distribution_entropy": 0.7804702805505999, "calibration/confidence_entropy": 0.4697839469577409, "calibration/coverage@0%": 0.026063424717145345, "calibration/coverage@1%": 0.026063424717145345, "calibration/coverage@10%": 0.06167007814052618, "calibration/coverage@15%": 0.14026525333566334, "calibration/coverage@20%": 0.47490888816362054, "calibration/coverage@25%": 0.5881105852915579, "calibration/coverage@30%": 0.6391998476936467, "calibration/coverage@5%": 0.03283425805047868, "calibration/ece": 0.1501559543471629, "calibration/mean_confidence": 0.7493281618142615, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00737847222222221, "completions/max_length": 3241.2, "completions/max_terminated_length": 3241.2, "completions/mean_length": 932.678564453125, "completions/mean_terminated_length": 939.5830688476562, "completions/min_length": 0.0, "completions/min_terminated_length": 352.4, "epoch": 0.28799640004499943, "grad_norm": 0.00040981321944855154, "learning_rate": 2.6506024096385547e-06, "loss": -0.0061, "num_tokens": 268227826.0, "reward": 1.0591084241867066, "reward_std": 0.13269660919904708, "rewards/accuracy_reward": 0.6907986164093017, "rewards/brier_reward": 0.7958985686302185, "rewards/confidence_uniqueness_reward": 0.9336257338523865, "rewards/format_reward": 0.9923611044883728, "rewards/frontier_aurc_reward": -0.003998850425705314, "rewards/frontier_coverage_1": -0.00013376011047512294, "rewards/frontier_coverage_10": -0.00013376011047512294, "rewards/frontier_coverage_15": 0.0030363661935552955, "rewards/frontier_coverage_20": 0.012066485453397035, "rewards/frontier_coverage_25": 0.03752352148294449, "rewards/frontier_coverage_5": -0.00013376011047512294, "rewards/frontier_ece_reward": 0.00379801276139915, "signal/accuracy_reward/centered_abs_mean": 0.1576171875, "signal/accuracy_reward/group_std_mean": 0.20998079478740692, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07880859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07880859375, "signal/advantage_abs_mean": 0.09780252277851105, "signal/advantage_pre_scale_abs_mean": 0.09780252277851105, "signal/advantage_pre_scale_std": 0.1692167788743973, "signal/advantage_std": 0.1692167788743973, "signal/brier_reward/centered_abs_mean": 0.127366141974926, "signal/brier_reward/group_std_mean": 0.16660855412483216, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01592076774686575, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01592076774686575, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0357662245631218, "signal/confidence_uniqueness_reward/group_std_mean": 0.05192890390753746, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004470778070390225, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004470778070390225, "signal/format_reward/centered_abs_mean": 0.01287977434694767, "signal/format_reward/group_std_mean": 0.025085731595754623, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006439887173473835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006439887173473835, "signal/frontier_aurc_reward/centered_abs_mean": 0.00548218721523881, "signal/frontier_aurc_reward/group_std_mean": 0.009825864806771278, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.813114884309471e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.813114884309471e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.07798316925764084, "signal/frontier_coverage_1/group_std_mean": 0.11075976490974426, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013958987081423401, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013958987081423401, "signal/frontier_coverage_10/centered_abs_mean": 0.07798316925764084, "signal/frontier_coverage_10/group_std_mean": 0.11075976490974426, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013958987081423401, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013958987081423401, "signal/frontier_coverage_15/centered_abs_mean": 0.06656168177723884, "signal/frontier_coverage_15/group_std_mean": 0.09562954902648926, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011914541013538838, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011914541013538838, "signal/frontier_coverage_20/centered_abs_mean": 0.04737524390220642, "signal/frontier_coverage_20/group_std_mean": 0.06730483770370484, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008480168529786169, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008480168529786169, "signal/frontier_coverage_25/centered_abs_mean": 0.04951869696378708, "signal/frontier_coverage_25/group_std_mean": 0.06624811142683029, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008863846655003726, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008863846655003726, "signal/frontier_coverage_5/centered_abs_mean": 0.07798316925764084, "signal/frontier_coverage_5/group_std_mean": 0.11075976490974426, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013958987081423401, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013958987081423401, "signal/frontier_ece_reward/centered_abs_mean": 0.0061300666071474556, "signal/frontier_ece_reward/group_std_mean": 0.007925087120383978, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007662583258934319, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007662583258934319, "step": 120 }, { "calibration/aurc": 0.2490672232995655, "calibration/batch_distribution_entropy": 0.7568955546189245, "calibration/buffer_distribution_entropy": 0.7913037149729222, "calibration/confidence_entropy": 0.46519046009015774, "calibration/coverage@0%": 0.0010416666666666667, "calibration/coverage@1%": 0.0010416666666666667, "calibration/coverage@10%": 0.012030677655677656, "calibration/coverage@15%": 0.28572573260073264, "calibration/coverage@20%": 0.39451121794871796, "calibration/coverage@25%": 0.4653273809523809, "calibration/coverage@30%": 0.8012541214768696, "calibration/coverage@5%": 0.0010416666666666667, "calibration/ece": 0.10894207814504417, "calibration/mean_confidence": 0.7644244479942441, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006944444444444442, "completions/max_length": 3704.6, "completions/max_terminated_length": 3704.6, "completions/mean_length": 894.88994140625, "completions/mean_terminated_length": 901.1129150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 334.2, "epoch": 0.2999962500468744, "grad_norm": 0.000503013376146555, "learning_rate": 2.5e-06, "loss": -0.0061, "num_tokens": 281654622.0, "reward": 1.1232542037963866, "reward_std": 0.14578927159309388, "rewards/accuracy_reward": 0.6760416626930237, "rewards/brier_reward": 0.7950587749481202, "rewards/confidence_uniqueness_reward": 0.9364853739738465, "rewards/format_reward": 0.9928819417953492, "rewards/frontier_aurc_reward": 0.27824820578098297, "rewards/frontier_coverage_1": 0.28667475739493964, "rewards/frontier_coverage_10": 0.2890449246915523, "rewards/frontier_coverage_15": 0.2900581806898117, "rewards/frontier_coverage_20": 0.3047246981412172, "rewards/frontier_coverage_25": 0.32476295456290244, "rewards/frontier_coverage_5": 0.2867823286447674, "rewards/frontier_ece_reward": 0.28376022321172056, "signal/accuracy_reward/centered_abs_mean": 0.1623372405767441, "signal/accuracy_reward/group_std_mean": 0.21117228865623475, "signal/accuracy_reward/group_zero_std_frac": 0.4055555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08116862028837205, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08116862028837205, "signal/advantage_abs_mean": 0.11012209504842758, "signal/advantage_pre_scale_abs_mean": 0.11012209504842758, "signal/advantage_pre_scale_std": 0.1846640706062317, "signal/advantage_std": 0.1846640706062317, "signal/brier_reward/centered_abs_mean": 0.12385518252849578, "signal/brier_reward/group_std_mean": 0.16202466189861298, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015481897816061973, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015481897816061973, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033407071605324745, "signal/confidence_uniqueness_reward/group_std_mean": 0.04837455451488495, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004175883950665593, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004175883950665593, "signal/format_reward/centered_abs_mean": 0.010980902891606092, "signal/format_reward/group_std_mean": 0.021640064381062984, "signal/format_reward/group_zero_std_frac": 0.9055555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005490451445803046, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005490451445803046, "signal/frontier_aurc_reward/centered_abs_mean": 0.06701602544635535, "signal/frontier_aurc_reward/group_std_mean": 0.08801630456000567, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0011995868175290526, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0011995868175290526, "signal/frontier_coverage_1/centered_abs_mean": 0.11247340589761734, "signal/frontier_coverage_1/group_std_mean": 0.14900651276111604, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002013273839838803, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002013273839838803, "signal/frontier_coverage_10/centered_abs_mean": 0.10776370391249657, "signal/frontier_coverage_10/group_std_mean": 0.1427865594625473, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019289702409878372, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019289702409878372, "signal/frontier_coverage_15/centered_abs_mean": 0.10236316695809364, "signal/frontier_coverage_15/group_std_mean": 0.1350301742553711, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001832300634123385, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001832300634123385, "signal/frontier_coverage_20/centered_abs_mean": 0.09971490427851677, "signal/frontier_coverage_20/group_std_mean": 0.1307316705584526, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001784896687604487, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001784896687604487, "signal/frontier_coverage_25/centered_abs_mean": 0.10168659240007401, "signal/frontier_coverage_25/group_std_mean": 0.13174740523099898, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018201899249106646, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018201899249106646, "signal/frontier_coverage_5/centered_abs_mean": 0.11220043301582336, "signal/frontier_coverage_5/group_std_mean": 0.1486586645245552, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002008387632668018, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002008387632668018, "signal/frontier_ece_reward/centered_abs_mean": 0.06663836631923914, "signal/frontier_ece_reward/group_std_mean": 0.08546235710382462, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008329795789904892, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008329795789904892, "step": 125 }, { "calibration/aurc": 0.30066707983754387, "calibration/batch_distribution_entropy": 0.8246395962709563, "calibration/buffer_distribution_entropy": 0.7884855069320794, "calibration/confidence_entropy": 0.4842545938574242, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.071875, "calibration/coverage@15%": 0.17053008050478677, "calibration/coverage@20%": 0.2885958243538204, "calibration/coverage@25%": 0.3971019499469028, "calibration/coverage@30%": 0.5022268091824544, "calibration/coverage@5%": 0.0, "calibration/ece": 0.13698749449646505, "calibration/mean_confidence": 0.7181843915855899, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013281249999999977, "completions/max_length": 3485.4, "completions/max_terminated_length": 3485.4, "completions/mean_length": 880.1, "completions/mean_terminated_length": 892.1025756835937, "completions/min_length": 0.0, "completions/min_terminated_length": 256.2, "epoch": 0.3119961000487494, "grad_norm": 0.0005537553806789219, "learning_rate": 2.349397590361446e-06, "loss": -0.0116, "num_tokens": 294918174.0, "reward": 1.0387332916259766, "reward_std": 0.14156851768493653, "rewards/accuracy_reward": 0.6585069417953491, "rewards/brier_reward": 0.7857657551765442, "rewards/confidence_uniqueness_reward": 0.9319911122322082, "rewards/format_reward": 0.985243046283722, "rewards/frontier_aurc_reward": -0.005019997013732791, "rewards/frontier_coverage_1": 0.005013994639739394, "rewards/frontier_coverage_10": 0.005232905806042254, "rewards/frontier_coverage_15": 0.009756483789533376, "rewards/frontier_coverage_20": 0.019731305353343487, "rewards/frontier_coverage_25": 0.056990716606378555, "rewards/frontier_coverage_5": 0.005129149602726102, "rewards/frontier_ece_reward": 0.0032425791956484317, "signal/accuracy_reward/centered_abs_mean": 0.17151692509651184, "signal/accuracy_reward/group_std_mean": 0.22183212041854858, "signal/accuracy_reward/group_zero_std_frac": 0.38888890147209165, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08575846254825592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08575846254825592, "signal/advantage_abs_mean": 0.10706333220005035, "signal/advantage_pre_scale_abs_mean": 0.10706333220005035, "signal/advantage_pre_scale_std": 0.1797438532114029, "signal/advantage_std": 0.1797438532114029, "signal/brier_reward/centered_abs_mean": 0.13050457537174226, "signal/brier_reward/group_std_mean": 0.17029784321784974, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016313071921467782, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016313071921467782, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0440841481089592, "signal/confidence_uniqueness_reward/group_std_mean": 0.06221437379717827, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055105185136199, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055105185136199, "signal/format_reward/centered_abs_mean": 0.02253689235076308, "signal/format_reward/group_std_mean": 0.036004848405718805, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01126844617538154, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01126844617538154, "signal/frontier_aurc_reward/centered_abs_mean": 0.006880612950772047, "signal/frontier_aurc_reward/group_std_mean": 0.012929960340261459, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00012316296924836935, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00012316296924836935, "signal/frontier_coverage_1/centered_abs_mean": 0.09724105149507523, "signal/frontier_coverage_1/group_std_mean": 0.13408505618572236, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001740614790469408, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001740614790469408, "signal/frontier_coverage_10/centered_abs_mean": 0.09663455486297608, "signal/frontier_coverage_10/group_std_mean": 0.1333113506436348, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001729758526198566, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001729758526198566, "signal/frontier_coverage_15/centered_abs_mean": 0.0839831992983818, "signal/frontier_coverage_15/group_std_mean": 0.11663785576820374, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015032992465421557, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015032992465421557, "signal/frontier_coverage_20/centered_abs_mean": 0.062086595594882964, "signal/frontier_coverage_20/group_std_mean": 0.08598276525735855, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011113500688225031, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011113500688225031, "signal/frontier_coverage_25/centered_abs_mean": 0.06557924449443817, "signal/frontier_coverage_25/group_std_mean": 0.08785432279109955, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001173868461046368, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001173868461046368, "signal/frontier_coverage_5/centered_abs_mean": 0.09715530872344971, "signal/frontier_coverage_5/group_std_mean": 0.13397427797317504, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017390799708664416, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017390799708664416, "signal/frontier_ece_reward/centered_abs_mean": 0.007921741157770158, "signal/frontier_ece_reward/group_std_mean": 0.010610108450055122, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009902176447212697, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009902176447212697, "step": 130 }, { "calibration/aurc": 0.2678442784741456, "calibration/batch_distribution_entropy": 0.7911120701942849, "calibration/buffer_distribution_entropy": 0.8070088429755357, "calibration/confidence_entropy": 0.4452843908823473, "calibration/coverage@0%": 0.011491141732283465, "calibration/coverage@1%": 0.011491141732283465, "calibration/coverage@10%": 0.2167979002624672, "calibration/coverage@15%": 0.2167979002624672, "calibration/coverage@20%": 0.2797900262467191, "calibration/coverage@25%": 0.5111410415803288, "calibration/coverage@30%": 0.58236026212184, "calibration/coverage@5%": 0.041699475065616796, "calibration/ece": 0.16236960911891057, "calibration/mean_confidence": 0.7544395301047242, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008506944444444442, "completions/max_length": 3544.8, "completions/max_terminated_length": 3544.8, "completions/mean_length": 838.1894897460937, "completions/mean_terminated_length": 845.3768188476563, "completions/min_length": 0.0, "completions/min_terminated_length": 240.4, "epoch": 0.32399595005062437, "grad_norm": 0.0006604056106880307, "learning_rate": 2.1987951807228917e-06, "loss": -0.0072, "num_tokens": 307667141.0, "reward": 1.0535424947738647, "reward_std": 0.12735026627779006, "rewards/accuracy_reward": 0.6780381917953491, "rewards/brier_reward": 0.7968827724456787, "rewards/confidence_uniqueness_reward": 0.9389971017837524, "rewards/format_reward": 0.9914930462837219, "rewards/frontier_aurc_reward": -0.00391119560226798, "rewards/frontier_coverage_1": 0.003469866211526096, "rewards/frontier_coverage_10": 0.003514829161576927, "rewards/frontier_coverage_15": 0.0063144458457827565, "rewards/frontier_coverage_20": 0.016259027272462846, "rewards/frontier_coverage_25": 0.04592671990394592, "rewards/frontier_coverage_5": 0.003469866211526096, "rewards/frontier_ece_reward": 0.0035883668344467877, "signal/accuracy_reward/centered_abs_mean": 0.15276149958372115, "signal/accuracy_reward/group_std_mean": 0.20271962285041809, "signal/accuracy_reward/group_zero_std_frac": 0.42500001192092896, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07638074979186057, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07638074979186057, "signal/advantage_abs_mean": 0.09290645867586136, "signal/advantage_pre_scale_abs_mean": 0.09290645867586136, "signal/advantage_pre_scale_std": 0.16355342268943787, "signal/advantage_std": 0.16355342268943787, "signal/brier_reward/centered_abs_mean": 0.1277672603726387, "signal/brier_reward/group_std_mean": 0.1687987834215164, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015970907546579836, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015970907546579836, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037500524520874025, "signal/confidence_uniqueness_reward/group_std_mean": 0.057621393352746964, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004687565565109253, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004687565565109253, "signal/format_reward/centered_abs_mean": 0.01510416679084301, "signal/format_reward/group_std_mean": 0.03117924928665161, "signal/format_reward/group_zero_std_frac": 0.8611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007552083395421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007552083395421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0054148219525814055, "signal/frontier_aurc_reward/group_std_mean": 0.009956344775855542, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.69253116636537e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.69253116636537e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10388213694095612, "signal/frontier_coverage_1/group_std_mean": 0.1445908635854721, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018594901077449322, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018594901077449322, "signal/frontier_coverage_10/centered_abs_mean": 0.10351476073265076, "signal/frontier_coverage_10/group_std_mean": 0.14410740435123442, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018529141088947654, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018529141088947654, "signal/frontier_coverage_15/centered_abs_mean": 0.09183897525072098, "signal/frontier_coverage_15/group_std_mean": 0.12883895337581636, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016439176397398114, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016439176397398114, "signal/frontier_coverage_20/centered_abs_mean": 0.054963266104459764, "signal/frontier_coverage_20/group_std_mean": 0.07745110541582108, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009838424040935934, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009838424040935934, "signal/frontier_coverage_25/centered_abs_mean": 0.05390466451644897, "signal/frontier_coverage_25/group_std_mean": 0.07155173420906066, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009648935054428875, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009648935054428875, "signal/frontier_coverage_5/centered_abs_mean": 0.10388213694095612, "signal/frontier_coverage_5/group_std_mean": 0.1445908635854721, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018594901077449322, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018594901077449322, "signal/frontier_ece_reward/centered_abs_mean": 0.006833387818187475, "signal/frontier_ece_reward/group_std_mean": 0.009374895878136159, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008541734772734344, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008541734772734344, "step": 135 }, { "calibration/aurc": 0.19513551200965937, "calibration/batch_distribution_entropy": 0.8311802037571339, "calibration/buffer_distribution_entropy": 0.8109006063782959, "calibration/confidence_entropy": 0.46980175720261697, "calibration/coverage@0%": 0.006303574312695471, "calibration/coverage@1%": 0.006303574312695471, "calibration/coverage@10%": 0.15071783948397832, "calibration/coverage@15%": 0.3612107233516245, "calibration/coverage@20%": 0.5503501102900245, "calibration/coverage@25%": 0.8174175607275824, "calibration/coverage@30%": 0.9399248776958611, "calibration/coverage@5%": 0.006303574312695471, "calibration/ece": 0.09047163423754609, "calibration/mean_confidence": 0.7242396246544298, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011545138888888884, "completions/max_length": 3611.6, "completions/max_terminated_length": 3611.6, "completions/mean_length": 820.3759643554688, "completions/mean_terminated_length": 829.886279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 269.6, "epoch": 0.33599580005249935, "grad_norm": 0.00045778934145346284, "learning_rate": 2.0481927710843377e-06, "loss": -0.0093, "num_tokens": 320222096.0, "reward": 1.0436462879180908, "reward_std": 0.13221458494663238, "rewards/accuracy_reward": 0.6639757037162781, "rewards/brier_reward": 0.7856175661087036, "rewards/confidence_uniqueness_reward": 0.9376022934913635, "rewards/format_reward": 0.9881944537162781, "rewards/frontier_aurc_reward": -0.004951791558414698, "rewards/frontier_coverage_1": 0.009559250064194202, "rewards/frontier_coverage_10": 0.009636179637163877, "rewards/frontier_coverage_15": 0.010256279539316893, "rewards/frontier_coverage_20": 0.019052751082926988, "rewards/frontier_coverage_25": 0.04648147448897362, "rewards/frontier_coverage_5": 0.009559250064194202, "rewards/frontier_ece_reward": 0.0030080639291554688, "signal/accuracy_reward/centered_abs_mean": 0.1520562082529068, "signal/accuracy_reward/group_std_mean": 0.20364203155040742, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0760281041264534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0760281041264534, "signal/advantage_abs_mean": 0.09611336141824722, "signal/advantage_pre_scale_abs_mean": 0.09611336141824722, "signal/advantage_pre_scale_std": 0.1688544750213623, "signal/advantage_std": 0.1688544750213623, "signal/brier_reward/centered_abs_mean": 0.13363418728113174, "signal/brier_reward/group_std_mean": 0.17583012878894805, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016704273410141468, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016704273410141468, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03758625835180283, "signal/confidence_uniqueness_reward/group_std_mean": 0.05853767320513725, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004698282293975353, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004698282293975353, "signal/format_reward/centered_abs_mean": 0.017816840298473836, "signal/format_reward/group_std_mean": 0.03516379445791244, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008908420149236918, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008908420149236918, "signal/frontier_aurc_reward/centered_abs_mean": 0.0064054221846163275, "signal/frontier_aurc_reward/group_std_mean": 0.01180559191852808, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00011465705611044542, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00011465705611044542, "signal/frontier_coverage_1/centered_abs_mean": 0.10133122354745865, "signal/frontier_coverage_1/group_std_mean": 0.1425911009311676, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018138288287445903, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018138288287445903, "signal/frontier_coverage_10/centered_abs_mean": 0.1010772556066513, "signal/frontier_coverage_10/group_std_mean": 0.14227744638919831, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018092827638611197, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018092827638611197, "signal/frontier_coverage_15/centered_abs_mean": 0.09595019370317459, "signal/frontier_coverage_15/group_std_mean": 0.13558797985315324, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017175084445625543, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017175084445625543, "signal/frontier_coverage_20/centered_abs_mean": 0.060691657662391665, "signal/frontier_coverage_20/group_std_mean": 0.08569863438606262, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010863807052373885, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010863807052373885, "signal/frontier_coverage_25/centered_abs_mean": 0.05632963702082634, "signal/frontier_coverage_25/group_std_mean": 0.0726559266448021, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010083004366606475, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010083004366606475, "signal/frontier_coverage_5/centered_abs_mean": 0.10133122354745865, "signal/frontier_coverage_5/group_std_mean": 0.1425911009311676, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018138288287445903, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018138288287445903, "signal/frontier_ece_reward/centered_abs_mean": 0.007126413751393557, "signal/frontier_ece_reward/group_std_mean": 0.009845777973532676, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008908017189241946, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008908017189241946, "step": 140 }, { "calibration/aurc": 0.23844121848045688, "calibration/batch_distribution_entropy": 0.8294230120411807, "calibration/buffer_distribution_entropy": 0.8332834369273272, "calibration/confidence_entropy": 0.44966823648576637, "calibration/coverage@0%": 0.02151823058491345, "calibration/coverage@1%": 0.02151823058491345, "calibration/coverage@10%": 0.09493209050163852, "calibration/coverage@15%": 0.27207408387812093, "calibration/coverage@20%": 0.39152462093675044, "calibration/coverage@25%": 0.5437544963331418, "calibration/coverage@30%": 0.7778851174934724, "calibration/coverage@5%": 0.026767574416934448, "calibration/ece": 0.12294854977726558, "calibration/mean_confidence": 0.7225680540206456, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01137152777777779, "completions/max_length": 3828.4, "completions/max_terminated_length": 3828.4, "completions/mean_length": 812.3688354492188, "completions/mean_terminated_length": 821.8790405273437, "completions/min_length": 0.0, "completions/min_terminated_length": 276.6, "epoch": 0.34799565005437433, "grad_norm": 0.00042106854380108416, "learning_rate": 1.8975903614457832e-06, "loss": -0.0099, "num_tokens": 332645193.0, "reward": 1.0644186973571776, "reward_std": 0.1273445561528206, "rewards/accuracy_reward": 0.6989583373069763, "rewards/brier_reward": 0.8076271176338196, "rewards/confidence_uniqueness_reward": 0.9371534705162048, "rewards/format_reward": 0.9884548664093018, "rewards/frontier_aurc_reward": -0.0034654760267585514, "rewards/frontier_coverage_1": 0.010090233152732253, "rewards/frontier_coverage_10": 0.010090233152732253, "rewards/frontier_coverage_15": 0.010443723807111383, "rewards/frontier_coverage_20": 0.020869026891887188, "rewards/frontier_coverage_25": 0.06478311643004417, "rewards/frontier_coverage_5": 0.010090233152732253, "rewards/frontier_ece_reward": 0.003317009983584285, "signal/accuracy_reward/centered_abs_mean": 0.14793836772441865, "signal/accuracy_reward/group_std_mean": 0.1978321135044098, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07396918386220933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07396918386220933, "signal/advantage_abs_mean": 0.09218663424253463, "signal/advantage_pre_scale_abs_mean": 0.09218663424253463, "signal/advantage_pre_scale_std": 0.16679736375808715, "signal/advantage_std": 0.16679736375808715, "signal/brier_reward/centered_abs_mean": 0.12936757355928422, "signal/brier_reward/group_std_mean": 0.17085058093070984, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016170946694910527, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016170946694910527, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039029645174741744, "signal/confidence_uniqueness_reward/group_std_mean": 0.06023879200220108, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004878705646842718, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004878705646842718, "signal/format_reward/centered_abs_mean": 0.018603515345603228, "signal/format_reward/group_std_mean": 0.03493107426911592, "signal/format_reward/group_zero_std_frac": 0.8583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009301757672801614, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009301757672801614, "signal/frontier_aurc_reward/centered_abs_mean": 0.004749262239784002, "signal/frontier_aurc_reward/group_std_mean": 0.008506011310964822, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.501178963342682e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.501178963342682e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10415904968976974, "signal/frontier_coverage_1/group_std_mean": 0.1452164024114609, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018644470255821943, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018644470255821943, "signal/frontier_coverage_10/centered_abs_mean": 0.10415904968976974, "signal/frontier_coverage_10/group_std_mean": 0.1452164024114609, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018644470255821943, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018644470255821943, "signal/frontier_coverage_15/centered_abs_mean": 0.10013787895441055, "signal/frontier_coverage_15/group_std_mean": 0.14012934863567353, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017924679443240166, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017924679443240166, "signal/frontier_coverage_20/centered_abs_mean": 0.05490083321928978, "signal/frontier_coverage_20/group_std_mean": 0.0765575885772705, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009827248635701836, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009827248635701836, "signal/frontier_coverage_25/centered_abs_mean": 0.06228313967585564, "signal/frontier_coverage_25/group_std_mean": 0.07965980023145676, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001114868139848113, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001114868139848113, "signal/frontier_coverage_5/centered_abs_mean": 0.10415904968976974, "signal/frontier_coverage_5/group_std_mean": 0.1452164024114609, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018644470255821943, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018644470255821943, "signal/frontier_ece_reward/centered_abs_mean": 0.006574434693902731, "signal/frontier_ece_reward/group_std_mean": 0.00883613433688879, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008218043367378414, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008218043367378414, "step": 145 }, { "calibration/aurc": 0.213075250859452, "calibration/batch_distribution_entropy": 0.8233432774907394, "calibration/buffer_distribution_entropy": 0.8366827818336875, "calibration/confidence_entropy": 0.4392960218291321, "calibration/coverage@0%": 0.019067250061658745, "calibration/coverage@1%": 0.019067250061658745, "calibration/coverage@10%": 0.3197417803010817, "calibration/coverage@15%": 0.4912859673812978, "calibration/coverage@20%": 0.5355746522716303, "calibration/coverage@25%": 0.58310620015022, "calibration/coverage@30%": 0.6621681636503451, "calibration/coverage@5%": 0.17917223693829917, "calibration/ece": 0.14121752496570036, "calibration/mean_confidence": 0.7174865376212494, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012934027777777768, "completions/max_length": 3621.2, "completions/max_terminated_length": 3621.2, "completions/mean_length": 881.6501831054687, "completions/mean_terminated_length": 893.253125, "completions/min_length": 0.0, "completions/min_terminated_length": 270.2, "epoch": 0.3599955000562493, "grad_norm": 0.0004654189106076956, "learning_rate": 1.7469879518072292e-06, "loss": -0.0109, "num_tokens": 345912139.0, "reward": 1.0574634552001954, "reward_std": 0.13282708525657655, "rewards/accuracy_reward": 0.6893229246139526, "rewards/brier_reward": 0.798139750957489, "rewards/confidence_uniqueness_reward": 0.9334550976753235, "rewards/format_reward": 0.9870659708976746, "rewards/frontier_aurc_reward": -0.004001729190349579, "rewards/frontier_coverage_1": 0.010269207740202546, "rewards/frontier_coverage_10": 0.010370114585384727, "rewards/frontier_coverage_15": 0.012277117744088174, "rewards/frontier_coverage_20": 0.02485618032515049, "rewards/frontier_coverage_25": 0.07025006264448166, "rewards/frontier_coverage_5": 0.010269207740202546, "rewards/frontier_ece_reward": 0.0033263738732784986, "signal/accuracy_reward/centered_abs_mean": 0.1543891042470932, "signal/accuracy_reward/group_std_mean": 0.2028920382261276, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0771945521235466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0771945521235466, "signal/advantage_abs_mean": 0.09776623845100403, "signal/advantage_pre_scale_abs_mean": 0.09776623845100403, "signal/advantage_pre_scale_std": 0.17359991371631622, "signal/advantage_std": 0.17359991371631622, "signal/brier_reward/centered_abs_mean": 0.1349114805459976, "signal/brier_reward/group_std_mean": 0.17627350091934205, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0168639350682497, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0168639350682497, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039914284646511075, "signal/confidence_uniqueness_reward/group_std_mean": 0.061895917356014254, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004989285580813884, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004989285580813884, "signal/format_reward/centered_abs_mean": 0.02108832448720932, "signal/format_reward/group_std_mean": 0.03968926072120667, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01054416224360466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01054416224360466, "signal/frontier_aurc_reward/centered_abs_mean": 0.005396629869937897, "signal/frontier_aurc_reward/group_std_mean": 0.009891701303422451, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.659966890467331e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.659966890467331e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1010685533285141, "signal/frontier_coverage_1/group_std_mean": 0.1450372576713562, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018091270700097083, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018091270700097083, "signal/frontier_coverage_10/centered_abs_mean": 0.10048999190330506, "signal/frontier_coverage_10/group_std_mean": 0.14426515400409698, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017987707862630487, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017987707862630487, "signal/frontier_coverage_15/centered_abs_mean": 0.0918548583984375, "signal/frontier_coverage_15/group_std_mean": 0.13288411647081375, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016442019026726485, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016442019026726485, "signal/frontier_coverage_20/centered_abs_mean": 0.053203088045120236, "signal/frontier_coverage_20/group_std_mean": 0.07647181898355485, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009523351793177426, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009523351793177426, "signal/frontier_coverage_25/centered_abs_mean": 0.06769980266690254, "signal/frontier_coverage_25/group_std_mean": 0.0865550771355629, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012118263402953744, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012118263402953744, "signal/frontier_coverage_5/centered_abs_mean": 0.1010685533285141, "signal/frontier_coverage_5/group_std_mean": 0.1450372576713562, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018091270700097083, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018091270700097083, "signal/frontier_ece_reward/centered_abs_mean": 0.006280233804136515, "signal/frontier_ece_reward/group_std_mean": 0.008365536015480757, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007850292255170644, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007850292255170644, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.22214487372449288, "eval_calibration/batch_distribution_entropy": 0.7755822830854329, "eval_calibration/buffer_distribution_entropy": 0.8471814964949228, "eval_calibration/confidence_entropy": 0.4570869870657699, "eval_calibration/coverage@0%": 0.11139112903225806, "eval_calibration/coverage@1%": 0.11139112903225806, "eval_calibration/coverage@10%": 0.20278897849462366, "eval_calibration/coverage@15%": 0.3587029569892473, "eval_calibration/coverage@20%": 0.683635752688172, "eval_calibration/coverage@25%": 0.7795698924731184, "eval_calibration/coverage@30%": 0.9895833333333334, "eval_calibration/coverage@5%": 0.11139112903225806, "eval_calibration/ece": 0.19708409512610767, "eval_calibration/mean_confidence": 0.7251063188085235, "eval_completions/clipped_ratio": 0.006944444444444438, "eval_completions/max_length": 2807.0, "eval_completions/max_terminated_length": 2807.0, "eval_completions/mean_length": 860.0338643391927, "eval_completions/mean_terminated_length": 866.151621500651, "eval_completions/min_length": 124.66666666666667, "eval_completions/min_terminated_length": 340.0, "eval_loss": 0.0, "eval_num_tokens": 345912139.0, "eval_reward": 1.0506399472554524, "eval_reward_std": 0.25993067771196365, "eval_rewards/accuracy_reward": 0.6848958333333334, "eval_rewards/brier_reward": 0.7940681974093119, "eval_rewards/confidence_uniqueness_reward": 0.8835298220316569, "eval_rewards/format_reward": 0.9921875, "eval_rewards/frontier_aurc_reward": -0.00451858372737964, "eval_rewards/frontier_coverage_1": 0.007746024794566135, "eval_rewards/frontier_coverage_10": 0.0080074449069798, "eval_rewards/frontier_coverage_15": 0.010937723660996804, "eval_rewards/frontier_coverage_20": 0.02070824522525072, "eval_rewards/frontier_coverage_25": 0.06115776486694813, "eval_rewards/frontier_coverage_5": 0.007746024794566135, "eval_rewards/frontier_ece_reward": 0.0031805916223675013, "eval_runtime": 197.7756, "eval_samples_per_second": 5.056, "eval_signal/accuracy_reward/centered_abs_mean": 0.4182400206724803, "eval_signal/accuracy_reward/group_std_mean": 0.4636932412783305, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20912001033624014, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20912001033624014, "eval_signal/advantage_abs_mean": 0.22610543916622797, "eval_signal/advantage_pre_scale_abs_mean": 0.22610543916622797, "eval_signal/advantage_pre_scale_std": 0.2583668604493141, "eval_signal/advantage_std": 0.2583668604493141, "eval_signal/brier_reward/centered_abs_mean": 0.21914813170830408, "eval_signal/brier_reward/group_std_mean": 0.27482346693674725, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02739351646353801, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02739351646353801, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.055060590306917824, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08307173289358616, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006882573788364728, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006882573788364728, "eval_signal/format_reward/centered_abs_mean": 0.015028211598594984, "eval_signal/format_reward/group_std_mean": 0.041204764818151794, "eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007514105799297492, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007514105799297492, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007855243321197728, "eval_signal/frontier_aurc_reward/group_std_mean": 0.01771801950720449, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001406088534470958, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001406088534470958, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1449227419992288, "eval_signal/frontier_coverage_1/group_std_mean": 0.24502811332543692, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002594117036399742, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002594117036399742, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.14424370601773262, "eval_signal/frontier_coverage_10/group_std_mean": 0.24401270101467767, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002581962267868221, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002581962267868221, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.1283226286371549, "eval_signal/frontier_coverage_15/group_std_mean": 0.2211132695277532, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002296974960093697, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002296974960093697, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.0656218013415734, "eval_signal/frontier_coverage_20/group_std_mean": 0.11319748063882192, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011746301994814228, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011746301994814228, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.09421608969569206, "eval_signal/frontier_coverage_25/group_std_mean": 0.11541344473759334, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016864680025416117, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016864680025416117, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.1449227419992288, "eval_signal/frontier_coverage_5/group_std_mean": 0.24502811332543692, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002594117036399742, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002594117036399742, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.006373878801241517, "eval_signal/frontier_ece_reward/group_std_mean": 0.00929814165768524, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007967348501551896, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007967348501551896, "eval_steps_per_second": 0.03, "step": 150 }, { "calibration/aurc": 0.1394606443752609, "calibration/batch_distribution_entropy": 0.7936142557094252, "calibration/buffer_distribution_entropy": 0.845419702343172, "calibration/confidence_entropy": 0.41882709613499725, "calibration/coverage@0%": 0.017655267348676385, "calibration/coverage@1%": 0.017655267348676385, "calibration/coverage@10%": 0.5253642503070282, "calibration/coverage@15%": 0.6576306287853386, "calibration/coverage@20%": 0.7686752423003841, "calibration/coverage@25%": 0.8230803931486064, "calibration/coverage@30%": 0.9062827225130891, "calibration/coverage@5%": 0.21733968716937588, "calibration/ece": 0.1140321937468661, "calibration/mean_confidence": 0.7414189853264833, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666674, "completions/max_length": 3721.4, "completions/max_terminated_length": 3721.4, "completions/mean_length": 836.39609375, "completions/mean_terminated_length": 846.6472778320312, "completions/min_length": 0.0, "completions/min_terminated_length": 274.2, "epoch": 0.3719953500581243, "grad_norm": 0.00045802563545294106, "learning_rate": 1.5963855421686747e-06, "loss": -0.0107, "num_tokens": 358655134.0, "reward": 1.089371681213379, "reward_std": 0.13748800307512282, "rewards/accuracy_reward": 0.7427951455116272, "rewards/brier_reward": 0.8266907215118409, "rewards/confidence_uniqueness_reward": 0.9327721953392029, "rewards/format_reward": 0.9880208253860474, "rewards/frontier_aurc_reward": -0.0032799826469272373, "rewards/frontier_coverage_1": 0.007008756510913372, "rewards/frontier_coverage_10": 0.007702544890344143, "rewards/frontier_coverage_15": 0.011821538442745805, "rewards/frontier_coverage_20": 0.04276132583618164, "rewards/frontier_coverage_25": 0.1319062441587448, "rewards/frontier_coverage_5": 0.007008756510913372, "rewards/frontier_ece_reward": 0.0029005682095885276, "signal/accuracy_reward/centered_abs_mean": 0.16427408754825593, "signal/accuracy_reward/group_std_mean": 0.21161983013153077, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08213704377412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08213704377412796, "signal/advantage_abs_mean": 0.1013274610042572, "signal/advantage_pre_scale_abs_mean": 0.1013274610042572, "signal/advantage_pre_scale_std": 0.1804036021232605, "signal/advantage_std": 0.1804036021232605, "signal/brier_reward/centered_abs_mean": 0.13167781829833985, "signal/brier_reward/group_std_mean": 0.17299252152442932, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016459727287292482, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016459727287292482, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04091334193944931, "signal/confidence_uniqueness_reward/group_std_mean": 0.06404241994023323, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005114167742431164, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005114167742431164, "signal/format_reward/centered_abs_mean": 0.02075737863779068, "signal/format_reward/group_std_mean": 0.04021854251623154, "signal/format_reward/group_zero_std_frac": 0.8305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01037868931889534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01037868931889534, "signal/frontier_aurc_reward/centered_abs_mean": 0.004709955211728812, "signal/frontier_aurc_reward/group_std_mean": 0.008472612965852023, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.430819725617767e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.430819725617767e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.09957832992076873, "signal/frontier_coverage_1/group_std_mean": 0.14525699019432067, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017824520589783787, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017824520589783787, "signal/frontier_coverage_10/centered_abs_mean": 0.09750574976205825, "signal/frontier_coverage_10/group_std_mean": 0.1425594985485077, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017453528707847, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017453528707847, "signal/frontier_coverage_15/centered_abs_mean": 0.07561995238065719, "signal/frontier_coverage_15/group_std_mean": 0.11277424991130829, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013535971054807305, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013535971054807305, "signal/frontier_coverage_20/centered_abs_mean": 0.04978926405310631, "signal/frontier_coverage_20/group_std_mean": 0.06711633205413818, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008912277640774846, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008912277640774846, "signal/frontier_coverage_25/centered_abs_mean": 0.09789690375328064, "signal/frontier_coverage_25/group_std_mean": 0.12379591763019562, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017523544374853373, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017523544374853373, "signal/frontier_coverage_5/centered_abs_mean": 0.09957832992076873, "signal/frontier_coverage_5/group_std_mean": 0.14525699019432067, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017824520589783787, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017824520589783787, "signal/frontier_ece_reward/centered_abs_mean": 0.005051241349428892, "signal/frontier_ece_reward/group_std_mean": 0.006803230196237564, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006314051686786115, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006314051686786115, "step": 155 }, { "calibration/aurc": 0.18785021343328706, "calibration/batch_distribution_entropy": 0.6710961452816249, "calibration/buffer_distribution_entropy": 0.8140405972224055, "calibration/confidence_entropy": 0.35810035374049687, "calibration/coverage@0%": 0.023060960861917328, "calibration/coverage@1%": 0.023060960861917328, "calibration/coverage@10%": 0.42201104881266494, "calibration/coverage@15%": 0.5009179859278804, "calibration/coverage@20%": 0.721875, "calibration/coverage@25%": 0.790625, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.1405123131046614, "calibration/ece": 0.12457323190466667, "calibration/mean_confidence": 0.8050869567093308, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01362847222222221, "completions/max_length": 3826.4, "completions/max_terminated_length": 3826.4, "completions/mean_length": 839.6751708984375, "completions/mean_terminated_length": 851.2668823242187, "completions/min_length": 0.0, "completions/min_terminated_length": 271.2, "epoch": 0.38399520005999926, "grad_norm": 0.00044471403816714883, "learning_rate": 1.4457831325301204e-06, "loss": -0.0123, "num_tokens": 371415488.0, "reward": 1.048078441619873, "reward_std": 0.13773128092288972, "rewards/accuracy_reward": 0.6730902791023254, "rewards/brier_reward": 0.7841872692108154, "rewards/confidence_uniqueness_reward": 0.9249788880348205, "rewards/format_reward": 0.9861979126930237, "rewards/frontier_aurc_reward": -0.00579830389469862, "rewards/frontier_coverage_1": 0.021006893925368787, "rewards/frontier_coverage_10": 0.021663734689354897, "rewards/frontier_coverage_15": 0.0223736809566617, "rewards/frontier_coverage_20": 0.03696827031672001, "rewards/frontier_coverage_25": 0.12879179567098617, "rewards/frontier_coverage_5": 0.021024198178201912, "rewards/frontier_ece_reward": 0.003077392978593707, "signal/accuracy_reward/centered_abs_mean": 0.15486110746860504, "signal/accuracy_reward/group_std_mean": 0.2035621464252472, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07743055373430252, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07743055373430252, "signal/advantage_abs_mean": 0.10096549987792969, "signal/advantage_pre_scale_abs_mean": 0.10096549987792969, "signal/advantage_pre_scale_std": 0.17911962866783143, "signal/advantage_std": 0.17911962866783143, "signal/brier_reward/centered_abs_mean": 0.14338811337947846, "signal/brier_reward/group_std_mean": 0.18627047538757324, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017923514172434808, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017923514172434808, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04587777331471443, "signal/confidence_uniqueness_reward/group_std_mean": 0.07106368690729141, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005734721664339304, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005734721664339304, "signal/format_reward/centered_abs_mean": 0.02301974855363369, "signal/format_reward/group_std_mean": 0.04283802658319473, "signal/format_reward/group_zero_std_frac": 0.825, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011509874276816845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011509874276816845, "signal/frontier_aurc_reward/centered_abs_mean": 0.007479064725339412, "signal/frontier_aurc_reward/group_std_mean": 0.013051173277199268, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001338752525043674, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001338752525043674, "signal/frontier_coverage_1/centered_abs_mean": 0.09618723690509796, "signal/frontier_coverage_1/group_std_mean": 0.13724965155124663, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017217515734955669, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017217515734955669, "signal/frontier_coverage_10/centered_abs_mean": 0.09364039450883865, "signal/frontier_coverage_10/group_std_mean": 0.1338651016354561, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016761629842221737, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016761629842221737, "signal/frontier_coverage_15/centered_abs_mean": 0.0792768731713295, "signal/frontier_coverage_15/group_std_mean": 0.11451553106307984, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001419055974110961, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001419055974110961, "signal/frontier_coverage_20/centered_abs_mean": 0.050817693769931796, "signal/frontier_coverage_20/group_std_mean": 0.06823742240667344, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009096366818994283, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009096366818994283, "signal/frontier_coverage_25/centered_abs_mean": 0.1097530648112297, "signal/frontier_coverage_25/group_std_mean": 0.14021052420139313, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019645798252895474, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019645798252895474, "signal/frontier_coverage_5/centered_abs_mean": 0.09607678651809692, "signal/frontier_coverage_5/group_std_mean": 0.1371078222990036, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017197745153680443, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017197745153680443, "signal/frontier_ece_reward/centered_abs_mean": 0.005614162608981133, "signal/frontier_ece_reward/group_std_mean": 0.0075827624648809435, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007017703261226416, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007017703261226416, "step": 160 }, { "calibration/aurc": 0.21941487360333833, "calibration/batch_distribution_entropy": 0.7714647539506028, "calibration/buffer_distribution_entropy": 0.7886613300284808, "calibration/confidence_entropy": 0.3824705529750695, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1962694417442022, "calibration/coverage@15%": 0.4322914568187518, "calibration/coverage@20%": 0.5052041737994015, "calibration/coverage@25%": 0.5948411738398793, "calibration/coverage@30%": 0.7651094471992914, "calibration/coverage@5%": 0.1105263157894737, "calibration/ece": 0.13094677855056783, "calibration/mean_confidence": 0.732904433164731, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01909722222222221, "completions/max_length": 3621.4, "completions/max_terminated_length": 3621.4, "completions/mean_length": 862.18828125, "completions/mean_terminated_length": 879.2411743164063, "completions/min_length": 0.0, "completions/min_terminated_length": 278.4, "epoch": 0.39599505006187424, "grad_norm": 0.000504399067722261, "learning_rate": 1.2951807228915664e-06, "loss": -0.0156, "num_tokens": 384486969.0, "reward": 1.0396140456199645, "reward_std": 0.13989297300577164, "rewards/accuracy_reward": 0.6612847208976745, "rewards/brier_reward": 0.7844983577728272, "rewards/confidence_uniqueness_reward": 0.919456148147583, "rewards/format_reward": 0.9807291746139526, "rewards/frontier_aurc_reward": -0.004816135391592979, "rewards/frontier_coverage_1": 0.03149934858083725, "rewards/frontier_coverage_10": 0.03149934858083725, "rewards/frontier_coverage_15": 0.029880692809820177, "rewards/frontier_coverage_20": 0.03761248253285885, "rewards/frontier_coverage_25": 0.1226318396627903, "rewards/frontier_coverage_5": 0.03149934858083725, "rewards/frontier_ece_reward": 0.004833784187212586, "signal/accuracy_reward/centered_abs_mean": 0.15111762285232544, "signal/accuracy_reward/group_std_mean": 0.2020161658525467, "signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07555881142616272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07555881142616272, "signal/advantage_abs_mean": 0.10153342485427856, "signal/advantage_pre_scale_abs_mean": 0.10153342485427856, "signal/advantage_pre_scale_std": 0.181088188290596, "signal/advantage_std": 0.181088188290596, "signal/brier_reward/centered_abs_mean": 0.14908890426158905, "signal/brier_reward/group_std_mean": 0.1965240716934204, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01863611303269863, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01863611303269863, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.051202216744422914, "signal/confidence_uniqueness_reward/group_std_mean": 0.07755682170391083, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006400277093052864, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006400277093052864, "signal/format_reward/centered_abs_mean": 0.02900390625, "signal/format_reward/group_std_mean": 0.0507274828851223, "signal/format_reward/group_zero_std_frac": 0.8027777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014501953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014501953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.006489654909819365, "signal/frontier_aurc_reward/group_std_mean": 0.011499019339680672, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001161648178822361, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001161648178822361, "signal/frontier_coverage_1/centered_abs_mean": 0.10610861033201217, "signal/frontier_coverage_1/group_std_mean": 0.15249101519584657, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001899344054982066, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001899344054982066, "signal/frontier_coverage_10/centered_abs_mean": 0.10610861033201217, "signal/frontier_coverage_10/group_std_mean": 0.15249101519584657, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001899344054982066, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001899344054982066, "signal/frontier_coverage_15/centered_abs_mean": 0.09463098794221877, "signal/frontier_coverage_15/group_std_mean": 0.13721374273300171, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016938945977017284, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016938945977017284, "signal/frontier_coverage_20/centered_abs_mean": 0.06229153722524643, "signal/frontier_coverage_20/group_std_mean": 0.08806440830230713, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011150184785947205, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011150184785947205, "signal/frontier_coverage_25/centered_abs_mean": 0.09881225526332856, "signal/frontier_coverage_25/group_std_mean": 0.12897567003965377, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017687393119558692, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017687393119558692, "signal/frontier_coverage_5/centered_abs_mean": 0.10610861033201217, "signal/frontier_coverage_5/group_std_mean": 0.15249101519584657, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001899344054982066, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001899344054982066, "signal/frontier_ece_reward/centered_abs_mean": 0.007002682704478502, "signal/frontier_ece_reward/group_std_mean": 0.009100762195885181, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008753353380598128, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008753353380598128, "step": 165 }, { "calibration/aurc": 0.16166878537469914, "calibration/batch_distribution_entropy": 0.7354266691642397, "calibration/buffer_distribution_entropy": 0.7575519463431387, "calibration/confidence_entropy": 0.37874483129131903, "calibration/coverage@0%": 0.02202937983537398, "calibration/coverage@1%": 0.02202937983537398, "calibration/coverage@10%": 0.19157356639320605, "calibration/coverage@15%": 0.6136701427359228, "calibration/coverage@20%": 0.7577182526254914, "calibration/coverage@25%": 0.8434017263336635, "calibration/coverage@30%": 0.9558143483797934, "calibration/coverage@5%": 0.06191477374228745, "calibration/ece": 0.11013321716453353, "calibration/mean_confidence": 0.7809981872862709, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016840277777777746, "completions/max_length": 3700.0, "completions/max_terminated_length": 3700.0, "completions/mean_length": 844.8034912109375, "completions/mean_terminated_length": 859.2811767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 288.2, "epoch": 0.4079949000637492, "grad_norm": 0.0004786611534655094, "learning_rate": 1.1445783132530121e-06, "loss": -0.0159, "num_tokens": 397308289.0, "reward": 1.0684638977050782, "reward_std": 0.1363822802901268, "rewards/accuracy_reward": 0.7136284589767456, "rewards/brier_reward": 0.8065288186073303, "rewards/confidence_uniqueness_reward": 0.9220426321029663, "rewards/format_reward": 0.9829861044883728, "rewards/frontier_aurc_reward": -0.0036202599760144947, "rewards/frontier_coverage_1": 0.012478399742394685, "rewards/frontier_coverage_10": 0.012478399742394685, "rewards/frontier_coverage_15": 0.013967123441398143, "rewards/frontier_coverage_20": 0.030986898019909857, "rewards/frontier_coverage_25": 0.10951882898807526, "rewards/frontier_coverage_5": 0.012478399742394685, "rewards/frontier_ece_reward": 0.005718720983713866, "signal/accuracy_reward/centered_abs_mean": 0.1496690556406975, "signal/accuracy_reward/group_std_mean": 0.20075952112674714, "signal/accuracy_reward/group_zero_std_frac": 0.4194444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07483452782034875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07483452782034875, "signal/advantage_abs_mean": 0.09796086251735688, "signal/advantage_pre_scale_abs_mean": 0.09796086251735688, "signal/advantage_pre_scale_std": 0.17858160734176637, "signal/advantage_std": 0.17858160734176637, "signal/brier_reward/centered_abs_mean": 0.14328038990497588, "signal/brier_reward/group_std_mean": 0.18521734774112703, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017910048738121985, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017910048738121985, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05042630434036255, "signal/confidence_uniqueness_reward/group_std_mean": 0.07379210442304611, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006303288042545319, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006303288042545319, "signal/format_reward/centered_abs_mean": 0.02784288227558136, "signal/format_reward/group_std_mean": 0.04672937579452992, "signal/format_reward/group_zero_std_frac": 0.8277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01392144113779068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01392144113779068, "signal/frontier_aurc_reward/centered_abs_mean": 0.005079053156077862, "signal/frontier_aurc_reward/group_std_mean": 0.009023398347198962, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.091504471143707e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.091504471143707e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10737672746181488, "signal/frontier_coverage_1/group_std_mean": 0.1529840499162674, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019220433663576842, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019220433663576842, "signal/frontier_coverage_10/centered_abs_mean": 0.10737672746181488, "signal/frontier_coverage_10/group_std_mean": 0.1529840499162674, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019220433663576842, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019220433663576842, "signal/frontier_coverage_15/centered_abs_mean": 0.09843996018171311, "signal/frontier_coverage_15/group_std_mean": 0.14119636714458467, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017620753031224012, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017620753031224012, "signal/frontier_coverage_20/centered_abs_mean": 0.06077901348471641, "signal/frontier_coverage_20/group_std_mean": 0.08596007078886032, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010879443609155715, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010879443609155715, "signal/frontier_coverage_25/centered_abs_mean": 0.08554520159959793, "signal/frontier_coverage_25/group_std_mean": 0.10874855667352676, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015312590170651675, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015312590170651675, "signal/frontier_coverage_5/centered_abs_mean": 0.10737672746181488, "signal/frontier_coverage_5/group_std_mean": 0.1529840499162674, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019220433663576842, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019220433663576842, "signal/frontier_ece_reward/centered_abs_mean": 0.007207877747714519, "signal/frontier_ece_reward/group_std_mean": 0.0095659539103508, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009009847184643149, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009009847184643149, "step": 170 }, { "calibration/aurc": 0.12835975236328778, "calibration/batch_distribution_entropy": 0.821059537069926, "calibration/buffer_distribution_entropy": 0.7583996288756972, "calibration/confidence_entropy": 0.4081814972315116, "calibration/coverage@0%": 0.00862533692722372, "calibration/coverage@1%": 0.00862533692722372, "calibration/coverage@10%": 0.33159990384193466, "calibration/coverage@15%": 0.7465159898081857, "calibration/coverage@20%": 0.8805055999149627, "calibration/coverage@25%": 0.9511387978142076, "calibration/coverage@30%": 0.9962666666666667, "calibration/coverage@5%": 0.21060197663971247, "calibration/ece": 0.084675956737041, "calibration/mean_confidence": 0.7221457613282724, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01944444444444444, "completions/max_length": 3881.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 882.5856811523438, "completions/mean_terminated_length": 900.1067504882812, "completions/min_length": 0.0, "completions/min_terminated_length": 265.2, "epoch": 0.4199947500656242, "grad_norm": 0.0004372715193312615, "learning_rate": 9.93975903614458e-07, "loss": -0.0184, "num_tokens": 410583644.0, "reward": 1.0630595207214355, "reward_std": 0.1402587652206421, "rewards/accuracy_reward": 0.7052951455116272, "rewards/brier_reward": 0.801636004447937, "rewards/confidence_uniqueness_reward": 0.9212079167366027, "rewards/format_reward": 0.9804687619209289, "rewards/frontier_aurc_reward": -0.003202560031786561, "rewards/frontier_coverage_1": 0.009410932660102844, "rewards/frontier_coverage_10": 0.00938644101843238, "rewards/frontier_coverage_15": 0.011940532876178623, "rewards/frontier_coverage_20": 0.04426906034350395, "rewards/frontier_coverage_25": 0.15708873867988588, "rewards/frontier_coverage_5": 0.009410932660102844, "rewards/frontier_ece_reward": 0.0044516745489090685, "signal/accuracy_reward/centered_abs_mean": 0.15396592915058135, "signal/accuracy_reward/group_std_mean": 0.20630868673324584, "signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07698296457529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07698296457529068, "signal/advantage_abs_mean": 0.10177666842937469, "signal/advantage_pre_scale_abs_mean": 0.10177666842937469, "signal/advantage_pre_scale_std": 0.18463847935199737, "signal/advantage_std": 0.18463847935199737, "signal/brier_reward/centered_abs_mean": 0.1451348751783371, "signal/brier_reward/group_std_mean": 0.1891576647758484, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018141859397292136, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018141859397292136, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05311574935913086, "signal/confidence_uniqueness_reward/group_std_mean": 0.07745091170072556, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006639468669891358, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006639468669891358, "signal/format_reward/centered_abs_mean": 0.03104926161468029, "signal/format_reward/group_std_mean": 0.05074087902903557, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015524630807340145, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015524630807340145, "signal/frontier_aurc_reward/centered_abs_mean": 0.004582889564335346, "signal/frontier_aurc_reward/group_std_mean": 0.008765554707497358, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.203371689887717e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.203371689887717e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11115374714136124, "signal/frontier_coverage_1/group_std_mean": 0.15887077152729034, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001989651983603835, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001989651983603835, "signal/frontier_coverage_10/centered_abs_mean": 0.11011287569999695, "signal/frontier_coverage_10/group_std_mean": 0.15743386447429658, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001971020409837365, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001971020409837365, "signal/frontier_coverage_15/centered_abs_mean": 0.09279258996248245, "signal/frontier_coverage_15/group_std_mean": 0.13348000943660737, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001660987362265587, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001660987362265587, "signal/frontier_coverage_20/centered_abs_mean": 0.06362280994653702, "signal/frontier_coverage_20/group_std_mean": 0.08751891702413558, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011388482293114067, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011388482293114067, "signal/frontier_coverage_25/centered_abs_mean": 0.10877040922641754, "signal/frontier_coverage_25/group_std_mean": 0.13969840705394745, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019469902152195572, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019469902152195572, "signal/frontier_coverage_5/centered_abs_mean": 0.11115374714136124, "signal/frontier_coverage_5/group_std_mean": 0.15887077152729034, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001989651983603835, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001989651983603835, "signal/frontier_ece_reward/centered_abs_mean": 0.007770705409348011, "signal/frontier_ece_reward/group_std_mean": 0.010677088052034378, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009713381761685014, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009713381761685014, "step": 175 }, { "calibration/aurc": 0.11867554556626558, "calibration/batch_distribution_entropy": 0.7719426589599478, "calibration/buffer_distribution_entropy": 0.7868053149641373, "calibration/confidence_entropy": 0.3982474085000685, "calibration/coverage@0%": 0.04073107049608355, "calibration/coverage@1%": 0.04073107049608355, "calibration/coverage@10%": 0.42235769120170313, "calibration/coverage@15%": 0.7863377057034178, "calibration/coverage@20%": 0.9337981654542539, "calibration/coverage@25%": 0.9724137931034482, "calibration/coverage@30%": 0.9946949602122016, "calibration/coverage@5%": 0.11290462955164378, "calibration/ece": 0.0809440485719725, "calibration/mean_confidence": 0.7680337505150759, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02213541666666665, "completions/max_length": 3965.0, "completions/max_terminated_length": 3965.0, "completions/mean_length": 863.0920166015625, "completions/mean_terminated_length": 882.6232299804688, "completions/min_length": 0.0, "completions/min_terminated_length": 304.4, "epoch": 0.4319946000674992, "grad_norm": 0.00038124839193187654, "learning_rate": 8.433734939759036e-07, "loss": -0.0194, "num_tokens": 423626432.0, "reward": 1.0619913816452027, "reward_std": 0.1390293389558792, "rewards/accuracy_reward": 0.7075520753860474, "rewards/brier_reward": 0.7912116408348083, "rewards/confidence_uniqueness_reward": 0.9133248925209045, "rewards/format_reward": 0.9776909589767456, "rewards/frontier_aurc_reward": -0.004389631748199463, "rewards/frontier_coverage_1": 0.007734634727239609, "rewards/frontier_coverage_10": 0.008618967141956091, "rewards/frontier_coverage_15": 0.021352371852844953, "rewards/frontier_coverage_20": 0.06736778169870376, "rewards/frontier_coverage_25": 0.21702924072742463, "rewards/frontier_coverage_5": 0.007734634727239609, "rewards/frontier_ece_reward": 0.003818414593115449, "signal/accuracy_reward/centered_abs_mean": 0.14320203959941863, "signal/accuracy_reward/group_std_mean": 0.19457052350044252, "signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07160101979970931, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07160101979970931, "signal/advantage_abs_mean": 0.09958047866821289, "signal/advantage_pre_scale_abs_mean": 0.09958047866821289, "signal/advantage_pre_scale_std": 0.18535009622573853, "signal/advantage_std": 0.18535009622573853, "signal/brier_reward/centered_abs_mean": 0.14485330879688263, "signal/brier_reward/group_std_mean": 0.19100603461265564, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01810666359961033, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01810666359961033, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05692942887544632, "signal/confidence_uniqueness_reward/group_std_mean": 0.0827468365430832, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00711617860943079, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00711617860943079, "signal/format_reward/centered_abs_mean": 0.03310004323720932, "signal/format_reward/group_std_mean": 0.05456235036253929, "signal/format_reward/group_zero_std_frac": 0.7972222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01655002161860466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01655002161860466, "signal/frontier_aurc_reward/centered_abs_mean": 0.005390328448265791, "signal/frontier_aurc_reward/group_std_mean": 0.009165607579052448, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.648687555454671e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.648687555454671e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1076874241232872, "signal/frontier_coverage_1/group_std_mean": 0.15496502816677094, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019276048755273224, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019276048755273224, "signal/frontier_coverage_10/centered_abs_mean": 0.104564568400383, "signal/frontier_coverage_10/group_std_mean": 0.15102957487106322, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018717057770118116, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018717057770118116, "signal/frontier_coverage_15/centered_abs_mean": 0.07957575172185898, "signal/frontier_coverage_15/group_std_mean": 0.11534264981746674, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014244059333577753, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014244059333577753, "signal/frontier_coverage_20/centered_abs_mean": 0.07137952968478203, "signal/frontier_coverage_20/group_std_mean": 0.09536249935626984, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012776935007423162, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012776935007423162, "signal/frontier_coverage_25/centered_abs_mean": 0.14418553411960602, "signal/frontier_coverage_25/group_std_mean": 0.18653603494167328, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025809210259467364, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025809210259467364, "signal/frontier_coverage_5/centered_abs_mean": 0.1076874241232872, "signal/frontier_coverage_5/group_std_mean": 0.15496502816677094, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019276048755273224, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019276048755273224, "signal/frontier_ece_reward/centered_abs_mean": 0.008868717961013317, "signal/frontier_ece_reward/group_std_mean": 0.012341622821986675, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011085897451266647, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011085897451266647, "step": 180 }, { "calibration/aurc": 0.17966311621561074, "calibration/batch_distribution_entropy": 0.7566981674049396, "calibration/buffer_distribution_entropy": 0.7915198321155537, "calibration/confidence_entropy": 0.39772641085626637, "calibration/coverage@0%": 0.013611198447090345, "calibration/coverage@1%": 0.013611198447090345, "calibration/coverage@10%": 0.085222752313644, "calibration/coverage@15%": 0.34522302571323116, "calibration/coverage@20%": 0.8419730062027713, "calibration/coverage@25%": 0.9279373368146213, "calibration/coverage@30%": 0.9514360313315926, "calibration/coverage@5%": 0.035600727242901864, "calibration/ece": 0.12424600098394474, "calibration/mean_confidence": 0.7696269697210354, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01918402777777777, "completions/max_length": 3683.6, "completions/max_terminated_length": 3683.6, "completions/mean_length": 860.390283203125, "completions/mean_terminated_length": 877.1977905273437, "completions/min_length": 0.0, "completions/min_terminated_length": 264.0, "epoch": 0.44399445006937416, "grad_norm": 0.00042491775820963085, "learning_rate": 6.927710843373495e-07, "loss": -0.0157, "num_tokens": 436628176.0, "reward": 1.0528750896453858, "reward_std": 0.14500550627708436, "rewards/accuracy_reward": 0.6854166746139526, "rewards/brier_reward": 0.7892068386077881, "rewards/confidence_uniqueness_reward": 0.9187054872512818, "rewards/format_reward": 0.9807291626930237, "rewards/frontier_aurc_reward": -0.0036725443322211506, "rewards/frontier_coverage_1": 0.01439770613797009, "rewards/frontier_coverage_10": 0.016149942949414253, "rewards/frontier_coverage_15": 0.026541436091065407, "rewards/frontier_coverage_20": 0.07000061422586441, "rewards/frontier_coverage_25": 0.18287851214408873, "rewards/frontier_coverage_5": 0.01439770613797009, "rewards/frontier_ece_reward": 0.004581967741250992, "signal/accuracy_reward/centered_abs_mean": 0.15631510615348815, "signal/accuracy_reward/group_std_mean": 0.2074061244726181, "signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07815755307674407, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07815755307674407, "signal/advantage_abs_mean": 0.10565940588712693, "signal/advantage_pre_scale_abs_mean": 0.10565940588712693, "signal/advantage_pre_scale_std": 0.18545168936252593, "signal/advantage_std": 0.18545168936252593, "signal/brier_reward/centered_abs_mean": 0.14972881078720093, "signal/brier_reward/group_std_mean": 0.1930097758769989, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018716101348400117, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018716101348400117, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05400483831763268, "signal/confidence_uniqueness_reward/group_std_mean": 0.0815995305776596, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006750604789704085, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006750604789704085, "signal/format_reward/centered_abs_mean": 0.03107638955116272, "signal/format_reward/group_std_mean": 0.05420016869902611, "signal/format_reward/group_zero_std_frac": 0.7916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01553819477558136, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01553819477558136, "signal/frontier_aurc_reward/centered_abs_mean": 0.004669300001114607, "signal/frontier_aurc_reward/group_std_mean": 0.00808509076014161, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.358046834473498e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.358046834473498e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11330873966217041, "signal/frontier_coverage_1/group_std_mean": 0.1614879548549652, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020282263401895763, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020282263401895763, "signal/frontier_coverage_10/centered_abs_mean": 0.10939195156097412, "signal/frontier_coverage_10/group_std_mean": 0.15617263913154603, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019581159343943, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019581159343943, "signal/frontier_coverage_15/centered_abs_mean": 0.09606794416904449, "signal/frontier_coverage_15/group_std_mean": 0.1361277371644974, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017196161206811666, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017196161206811666, "signal/frontier_coverage_20/centered_abs_mean": 0.08031494021415711, "signal/frontier_coverage_20/group_std_mean": 0.10763168931007386, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014376373728737235, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014376373728737235, "signal/frontier_coverage_25/centered_abs_mean": 0.13603066951036452, "signal/frontier_coverage_25/group_std_mean": 0.1738019824028015, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002434948831796646, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002434948831796646, "signal/frontier_coverage_5/centered_abs_mean": 0.11330873966217041, "signal/frontier_coverage_5/group_std_mean": 0.1614879548549652, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020282263401895763, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020282263401895763, "signal/frontier_ece_reward/centered_abs_mean": 0.009635485522449017, "signal/frontier_ece_reward/group_std_mean": 0.013568679615855218, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012044356903061272, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012044356903061272, "step": 185 }, { "calibration/aurc": 0.20223318880699964, "calibration/batch_distribution_entropy": 0.7491589023246843, "calibration/buffer_distribution_entropy": 0.7953545225235228, "calibration/confidence_entropy": 0.38962672073018173, "calibration/coverage@0%": 0.015676818892647274, "calibration/coverage@1%": 0.015676818892647274, "calibration/coverage@10%": 0.20546013155266002, "calibration/coverage@15%": 0.3649979169559784, "calibration/coverage@20%": 0.5088098875156228, "calibration/coverage@25%": 0.8386324237374438, "calibration/coverage@30%": 0.9468667546174142, "calibration/coverage@5%": 0.056930082600219076, "calibration/ece": 0.11801808575784636, "calibration/mean_confidence": 0.7763912756629218, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014930555555555558, "completions/max_length": 3731.2, "completions/max_terminated_length": 3731.2, "completions/mean_length": 864.9324829101563, "completions/mean_terminated_length": 878.110986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 293.6, "epoch": 0.45599430007124914, "grad_norm": 0.00039803129038773477, "learning_rate": 5.421686746987952e-07, "loss": -0.0133, "num_tokens": 449675142.0, "reward": 1.0776456117630004, "reward_std": 0.1367618814110756, "rewards/accuracy_reward": 0.7268229246139526, "rewards/brier_reward": 0.8068367838859558, "rewards/confidence_uniqueness_reward": 0.9222909212112427, "rewards/format_reward": 0.9850694537162781, "rewards/frontier_aurc_reward": -0.003724863426759839, "rewards/frontier_coverage_1": 0.002979634841904044, "rewards/frontier_coverage_10": 0.0035558654461055996, "rewards/frontier_coverage_15": 0.009191408194601536, "rewards/frontier_coverage_20": 0.052768574655056, "rewards/frontier_coverage_25": 0.2101300299167633, "rewards/frontier_coverage_5": 0.002979634841904044, "rewards/frontier_ece_reward": 0.004675363376736641, "signal/accuracy_reward/centered_abs_mean": 0.15314127504825592, "signal/accuracy_reward/group_std_mean": 0.20621364712715148, "signal/accuracy_reward/group_zero_std_frac": 0.3944444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07657063752412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07657063752412796, "signal/advantage_abs_mean": 0.09666193425655364, "signal/advantage_pre_scale_abs_mean": 0.09666193425655364, "signal/advantage_pre_scale_std": 0.17675647437572478, "signal/advantage_std": 0.17675647437572478, "signal/brier_reward/centered_abs_mean": 0.13968413770198823, "signal/brier_reward/group_std_mean": 0.1854879915714264, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01746051721274853, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01746051721274853, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04815400689840317, "signal/confidence_uniqueness_reward/group_std_mean": 0.07337165027856826, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006019250862300396, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006019250862300396, "signal/format_reward/centered_abs_mean": 0.02486979179084301, "signal/format_reward/group_std_mean": 0.04627573862671852, "signal/format_reward/group_zero_std_frac": 0.8083333253860474, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012434895895421506, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012434895895421506, "signal/frontier_aurc_reward/centered_abs_mean": 0.004705563187599182, "signal/frontier_aurc_reward/group_std_mean": 0.008424987457692623, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.422957907896488e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.422957907896488e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11363547295331955, "signal/frontier_coverage_1/group_std_mean": 0.16121746897697448, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020340749295428397, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020340749295428397, "signal/frontier_coverage_10/centered_abs_mean": 0.11250363886356354, "signal/frontier_coverage_10/group_std_mean": 0.15969001799821853, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020138150779530407, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020138150779530407, "signal/frontier_coverage_15/centered_abs_mean": 0.09940378814935684, "signal/frontier_coverage_15/group_std_mean": 0.1407702460885048, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017793278209865093, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017793278209865093, "signal/frontier_coverage_20/centered_abs_mean": 0.07035658955574035, "signal/frontier_coverage_20/group_std_mean": 0.09374350309371948, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012593829305842519, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012593829305842519, "signal/frontier_coverage_25/centered_abs_mean": 0.1395350754261017, "signal/frontier_coverage_25/group_std_mean": 0.1781696170568466, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024976777844130994, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024976777844130994, "signal/frontier_coverage_5/centered_abs_mean": 0.11363547295331955, "signal/frontier_coverage_5/group_std_mean": 0.16121746897697448, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020340749295428397, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020340749295428397, "signal/frontier_ece_reward/centered_abs_mean": 0.009676532447338104, "signal/frontier_ece_reward/group_std_mean": 0.013482803851366043, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001209566555917263, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001209566555917263, "step": 190 }, { "calibration/aurc": 0.18695428884355583, "calibration/batch_distribution_entropy": 0.8294959515627778, "calibration/buffer_distribution_entropy": 0.7830615575341693, "calibration/confidence_entropy": 0.4127994379638137, "calibration/coverage@0%": 0.030000602464743547, "calibration/coverage@1%": 0.030000602464743547, "calibration/coverage@10%": 0.3726668073790661, "calibration/coverage@15%": 0.47507091504284826, "calibration/coverage@20%": 0.6033368934355202, "calibration/coverage@25%": 0.6390882147110964, "calibration/coverage@30%": 0.7727477070519446, "calibration/coverage@5%": 0.14934237172733683, "calibration/ece": 0.12225915444458749, "calibration/mean_confidence": 0.7202413289371199, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02230902777777779, "completions/max_length": 3692.6, "completions/max_terminated_length": 3692.6, "completions/mean_length": 885.7539184570312, "completions/mean_terminated_length": 906.2072998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 246.8, "epoch": 0.46799415007312406, "grad_norm": 0.0003987095842603594, "learning_rate": 3.91566265060241e-07, "loss": -0.019, "num_tokens": 462959891.0, "reward": 1.0415829181671143, "reward_std": 0.14480677247047424, "rewards/accuracy_reward": 0.6694444417953491, "rewards/brier_reward": 0.7821706771850586, "rewards/confidence_uniqueness_reward": 0.9169838309288025, "rewards/format_reward": 0.977430546283722, "rewards/frontier_aurc_reward": -0.003745084721595049, "rewards/frontier_coverage_1": 0.01643837634474039, "rewards/frontier_coverage_10": 0.01651232047006488, "rewards/frontier_coverage_15": 0.019644578453153372, "rewards/frontier_coverage_20": 0.05048965364694595, "rewards/frontier_coverage_25": 0.17186331748962402, "rewards/frontier_coverage_5": 0.01643837634474039, "rewards/frontier_ece_reward": 0.004818407958373428, "signal/accuracy_reward/centered_abs_mean": 0.15354817509651184, "signal/accuracy_reward/group_std_mean": 0.20701175928115845, "signal/accuracy_reward/group_zero_std_frac": 0.3944444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07677408754825592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07677408754825592, "signal/advantage_abs_mean": 0.10590840876102448, "signal/advantage_pre_scale_abs_mean": 0.10590840876102448, "signal/advantage_pre_scale_std": 0.1864941358566284, "signal/advantage_std": 0.1864941358566284, "signal/brier_reward/centered_abs_mean": 0.15084939301013947, "signal/brier_reward/group_std_mean": 0.1953912854194641, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018856174126267434, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018856174126267434, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05446743220090866, "signal/confidence_uniqueness_reward/group_std_mean": 0.07849968373775482, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006808429025113583, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006808429025113583, "signal/format_reward/centered_abs_mean": 0.03295355886220932, "signal/format_reward/group_std_mean": 0.05323704555630684, "signal/format_reward/group_zero_std_frac": 0.8083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01647677943110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01647677943110466, "signal/frontier_aurc_reward/centered_abs_mean": 0.004789613839238882, "signal/frontier_aurc_reward/group_std_mean": 0.008727262448519468, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.57340870425105e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.57340870425105e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10987022221088409, "signal/frontier_coverage_1/group_std_mean": 0.16090194880962372, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019666770240291953, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019666770240291953, "signal/frontier_coverage_10/centered_abs_mean": 0.10942451506853104, "signal/frontier_coverage_10/group_std_mean": 0.16031207293272018, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019586987793445585, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019586987793445585, "signal/frontier_coverage_15/centered_abs_mean": 0.09429361820220947, "signal/frontier_coverage_15/group_std_mean": 0.13933112919330598, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001687855739146471, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001687855739146471, "signal/frontier_coverage_20/centered_abs_mean": 0.06689911112189292, "signal/frontier_coverage_20/group_std_mean": 0.09313002228736877, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001197494030930102, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001197494030930102, "signal/frontier_coverage_25/centered_abs_mean": 0.12959368526935577, "signal/frontier_coverage_25/group_std_mean": 0.1694766938686371, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00231972704641521, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00231972704641521, "signal/frontier_coverage_5/centered_abs_mean": 0.10987022221088409, "signal/frontier_coverage_5/group_std_mean": 0.16090194880962372, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019666770240291953, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019666770240291953, "signal/frontier_ece_reward/centered_abs_mean": 0.009920213930308818, "signal/frontier_ece_reward/group_std_mean": 0.014252552576363087, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012400267412886023, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012400267412886023, "step": 195 }, { "calibration/aurc": 0.16469432851019974, "calibration/batch_distribution_entropy": 0.7597396959589091, "calibration/buffer_distribution_entropy": 0.8007669131143469, "calibration/confidence_entropy": 0.3969919203304937, "calibration/coverage@0%": 0.024210526315789474, "calibration/coverage@1%": 0.024210526315789474, "calibration/coverage@10%": 0.30953425634416254, "calibration/coverage@15%": 0.39944743749136624, "calibration/coverage@20%": 0.8420339912280703, "calibration/coverage@25%": 0.9285635964912281, "calibration/coverage@30%": 0.9563157894736843, "calibration/coverage@5%": 0.04684210526315789, "calibration/ece": 0.12112572362046492, "calibration/mean_confidence": 0.7741498976535474, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014496527777777768, "completions/max_length": 3721.0, "completions/max_terminated_length": 3721.0, "completions/mean_length": 864.76953125, "completions/mean_terminated_length": 877.4760375976563, "completions/min_length": 0.0, "completions/min_terminated_length": 265.4, "epoch": 0.47999400007499904, "grad_norm": 0.00045119517017155886, "learning_rate": 2.409638554216868e-07, "loss": -0.0136, "num_tokens": 475989844.0, "reward": 1.0597358465194702, "reward_std": 0.1393290489912033, "rewards/accuracy_reward": 0.6921006798744201, "rewards/brier_reward": 0.7960708856582641, "rewards/confidence_uniqueness_reward": 0.9244546294212341, "rewards/format_reward": 0.9854166746139527, "rewards/frontier_aurc_reward": -0.004364499310031533, "rewards/frontier_coverage_1": 0.016046756226569415, "rewards/frontier_coverage_10": 0.016046756226569415, "rewards/frontier_coverage_15": 0.01778736140113324, "rewards/frontier_coverage_20": 0.04925883784890175, "rewards/frontier_coverage_25": 0.1840776115655899, "rewards/frontier_coverage_5": 0.016046756226569415, "rewards/frontier_ece_reward": 0.005061939358711243, "signal/accuracy_reward/centered_abs_mean": 0.15125325322151184, "signal/accuracy_reward/group_std_mean": 0.2005963295698166, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07562662661075592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07562662661075592, "signal/advantage_abs_mean": 0.10048190951347351, "signal/advantage_pre_scale_abs_mean": 0.10048190951347351, "signal/advantage_pre_scale_std": 0.17941418886184693, "signal/advantage_std": 0.17941418886184693, "signal/brier_reward/centered_abs_mean": 0.14328907430171967, "signal/brier_reward/group_std_mean": 0.18907705545425416, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017911134287714958, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017911134287714958, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04496906101703644, "signal/confidence_uniqueness_reward/group_std_mean": 0.07179959863424301, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005621132627129555, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005621132627129555, "signal/format_reward/centered_abs_mean": 0.02472873292863369, "signal/format_reward/group_std_mean": 0.04880723804235458, "signal/format_reward/group_zero_std_frac": 0.794444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012364366464316845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012364366464316845, "signal/frontier_aurc_reward/centered_abs_mean": 0.005653998162597418, "signal/frontier_aurc_reward/group_std_mean": 0.010000771470367908, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010120656515937299, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010120656515937299, "signal/frontier_coverage_1/centered_abs_mean": 0.10320640355348587, "signal/frontier_coverage_1/group_std_mean": 0.15092136859893798, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018473944626748561, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018473944626748561, "signal/frontier_coverage_10/centered_abs_mean": 0.10320640355348587, "signal/frontier_coverage_10/group_std_mean": 0.15092136859893798, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018473944626748561, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018473944626748561, "signal/frontier_coverage_15/centered_abs_mean": 0.08790723532438278, "signal/frontier_coverage_15/group_std_mean": 0.1300230875611305, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001573539455421269, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001573539455421269, "signal/frontier_coverage_20/centered_abs_mean": 0.07248112261295318, "signal/frontier_coverage_20/group_std_mean": 0.09901486486196517, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012974119978025555, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012974119978025555, "signal/frontier_coverage_25/centered_abs_mean": 0.13645926415920256, "signal/frontier_coverage_25/group_std_mean": 0.17553613483905792, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00244262064807117, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00244262064807117, "signal/frontier_coverage_5/centered_abs_mean": 0.10320640355348587, "signal/frontier_coverage_5/group_std_mean": 0.15092136859893798, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018473944626748561, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018473944626748561, "signal/frontier_ece_reward/centered_abs_mean": 0.009132616408169269, "signal/frontier_ece_reward/group_std_mean": 0.013096104748547078, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011415770510211586, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011415770510211586, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.1530627466072467, "eval_calibration/batch_distribution_entropy": 0.7084686843773372, "eval_calibration/buffer_distribution_entropy": 0.8127786012531732, "eval_calibration/confidence_entropy": 0.3849499401854783, "eval_calibration/coverage@0%": 0.15826612903225806, "eval_calibration/coverage@1%": 0.15826612903225806, "eval_calibration/coverage@10%": 0.42002688172043007, "eval_calibration/coverage@15%": 0.6705309139784946, "eval_calibration/coverage@20%": 0.9114583333333334, "eval_calibration/coverage@25%": 0.9479166666666666, "eval_calibration/coverage@30%": 0.96875, "eval_calibration/coverage@5%": 0.15826612903225806, "eval_calibration/ece": 0.18083638855035544, "eval_calibration/mean_confidence": 0.7608088995727863, "eval_completions/clipped_ratio": 0.018229166666666668, "eval_completions/max_length": 3308.1666666666665, "eval_completions/max_terminated_length": 3308.1666666666665, "eval_completions/mean_length": 879.684560139974, "eval_completions/mean_terminated_length": 895.6973673502604, "eval_completions/min_length": 65.5, "eval_completions/min_terminated_length": 314.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 475989844.0, "eval_reward": 1.0420524875322978, "eval_reward_std": 0.2861000994841258, "eval_rewards/accuracy_reward": 0.683159718910853, "eval_rewards/brier_reward": 0.7857048312822977, "eval_rewards/confidence_uniqueness_reward": 0.8660669028759003, "eval_rewards/format_reward": 0.9782986144224802, "eval_rewards/frontier_aurc_reward": -0.00444137768742318, "eval_rewards/frontier_coverage_1": 0.012749733519740403, "eval_rewards/frontier_coverage_10": 0.012749733519740403, "eval_rewards/frontier_coverage_15": 0.01225113959905381, "eval_rewards/frontier_coverage_20": 0.0417734415580829, "eval_rewards/frontier_coverage_25": 0.1467849425971508, "eval_rewards/frontier_coverage_5": 0.012749733519740403, "eval_rewards/frontier_ece_reward": 0.005217449079888563, "eval_runtime": 218.0299, "eval_samples_per_second": 4.587, "eval_signal/accuracy_reward/centered_abs_mean": 0.42138671875, "eval_signal/accuracy_reward/group_std_mean": 0.4653974175453186, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.210693359375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.210693359375, "eval_signal/advantage_abs_mean": 0.24256180475155512, "eval_signal/advantage_pre_scale_abs_mean": 0.24256180475155512, "eval_signal/advantage_pre_scale_std": 0.285640483101209, "eval_signal/advantage_std": 0.285640483101209, "eval_signal/brier_reward/centered_abs_mean": 0.2427068774898847, "eval_signal/brier_reward/group_std_mean": 0.3084608018398285, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030338359686235588, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.030338359686235588, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0698917464663585, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1197894203166167, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008736468308294812, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008736468308294812, "eval_signal/format_reward/centered_abs_mean": 0.04085286473855376, "eval_signal/format_reward/group_std_mean": 0.09480424628903468, "eval_signal/format_reward/group_zero_std_frac": 0.5555555721124014, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02042643236927688, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.02042643236927688, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00753099766249458, "eval_signal/frontier_aurc_reward/group_std_mean": 0.016231194448967774, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013480485601273054, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013480485601273054, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1466109355290731, "eval_signal/frontier_coverage_1/group_std_mean": 0.2583803633848826, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026243358151987195, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026243358151987195, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.1466109355290731, "eval_signal/frontier_coverage_10/group_std_mean": 0.2583803633848826, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026243358151987195, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026243358151987195, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.12884864211082458, "eval_signal/frontier_coverage_15/group_std_mean": 0.23108715812365213, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002306390592517952, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002306390592517952, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.10982246572772662, "eval_signal/frontier_coverage_20/group_std_mean": 0.16773877292871475, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019658220505031445, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019658220505031445, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.21963715553283691, "eval_signal/frontier_coverage_25/group_std_mean": 0.26794109493494034, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003931504984696706, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003931504984696706, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.1466109355290731, "eval_signal/frontier_coverage_5/group_std_mean": 0.2583803633848826, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026243358151987195, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026243358151987195, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.011491452964643637, "eval_signal/frontier_ece_reward/group_std_mean": 0.01822316941494743, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014364316205804546, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014364316205804546, "eval_steps_per_second": 0.028, "step": 200 }, { "calibration/aurc": 0.21459663503852902, "calibration/batch_distribution_entropy": 0.771657696018665, "calibration/buffer_distribution_entropy": 0.8062133912870209, "calibration/confidence_entropy": 0.39023679136762507, "calibration/coverage@0%": 0.005278506853310003, "calibration/coverage@1%": 0.005278506853310003, "calibration/coverage@10%": 0.07723617881098196, "calibration/coverage@15%": 0.33807708327146657, "calibration/coverage@20%": 0.4888472274299046, "calibration/coverage@25%": 0.7491479359146425, "calibration/coverage@30%": 0.9136024663583718, "calibration/coverage@5%": 0.005278506853310003, "calibration/ece": 0.12685333087756695, "calibration/mean_confidence": 0.7578144496622505, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015451388888888884, "completions/max_length": 3512.2, "completions/max_terminated_length": 3512.2, "completions/mean_length": 889.34462890625, "completions/mean_terminated_length": 903.3022705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 253.0, "epoch": 0.491993850076874, "grad_norm": 0.0003861828299704939, "learning_rate": 9.036144578313253e-08, "loss": -0.0133, "num_tokens": 489301046.0, "reward": 1.0838479042053222, "reward_std": 0.13164580911397933, "rewards/accuracy_reward": 0.7376736283302308, "rewards/brier_reward": 0.817084789276123, "rewards/confidence_uniqueness_reward": 0.922769570350647, "rewards/format_reward": 0.9843750119209289, "rewards/frontier_aurc_reward": -0.0028617044910788534, "rewards/frontier_coverage_1": 0.0036607160232961177, "rewards/frontier_coverage_10": 0.0036607160232961177, "rewards/frontier_coverage_15": 0.008217979548498988, "rewards/frontier_coverage_20": 0.04692377373576164, "rewards/frontier_coverage_25": 0.19758630394935608, "rewards/frontier_coverage_5": 0.0036607160232961177, "rewards/frontier_ece_reward": 0.0053809239529073235, "signal/accuracy_reward/centered_abs_mean": 0.14232856035232544, "signal/accuracy_reward/group_std_mean": 0.19213833510875702, "signal/accuracy_reward/group_zero_std_frac": 0.44722222089767455, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07116428017616272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07116428017616272, "signal/advantage_abs_mean": 0.09252178370952606, "signal/advantage_pre_scale_abs_mean": 0.09252178370952606, "signal/advantage_pre_scale_std": 0.17391646802425384, "signal/advantage_std": 0.17391646802425384, "signal/brier_reward/centered_abs_mean": 0.13427632600069045, "signal/brier_reward/group_std_mean": 0.1779688835144043, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016784540750086306, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016784540750086306, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04697767570614815, "signal/confidence_uniqueness_reward/group_std_mean": 0.07451951429247856, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005872209463268519, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005872209463268519, "signal/format_reward/centered_abs_mean": 0.025531684421002863, "signal/format_reward/group_std_mean": 0.04952741749584675, "signal/format_reward/group_zero_std_frac": 0.7916666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012765842210501432, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012765842210501432, "signal/frontier_aurc_reward/centered_abs_mean": 0.0039027729537338017, "signal/frontier_aurc_reward/group_std_mean": 0.00706147076562047, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.985963555052876e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.985963555052876e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11220380365848541, "signal/frontier_coverage_1/group_std_mean": 0.15874993205070495, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020084480987861753, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020084480987861753, "signal/frontier_coverage_10/centered_abs_mean": 0.11220380365848541, "signal/frontier_coverage_10/group_std_mean": 0.15874993205070495, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020084480987861753, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020084480987861753, "signal/frontier_coverage_15/centered_abs_mean": 0.09757517576217652, "signal/frontier_coverage_15/group_std_mean": 0.13811067789793013, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017465956043452024, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017465956043452024, "signal/frontier_coverage_20/centered_abs_mean": 0.0743746891617775, "signal/frontier_coverage_20/group_std_mean": 0.10126109570264816, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013313068542629481, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013313068542629481, "signal/frontier_coverage_25/centered_abs_mean": 0.12186015099287033, "signal/frontier_coverage_25/group_std_mean": 0.15898796319961547, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002181296655908227, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002181296655908227, "signal/frontier_coverage_5/centered_abs_mean": 0.11220380365848541, "signal/frontier_coverage_5/group_std_mean": 0.15874993205070495, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020084480987861753, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020084480987861753, "signal/frontier_ece_reward/centered_abs_mean": 0.009247669950127602, "signal/frontier_ece_reward/group_std_mean": 0.01250618938356638, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011559587437659502, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011559587437659502, "step": 205 }, { "calibration/aurc": 0.1501536281365257, "calibration/batch_distribution_entropy": 0.7866720396909507, "calibration/buffer_distribution_entropy": 0.8006953548191152, "calibration/confidence_entropy": 0.4023190237959316, "calibration/coverage@0%": 0.04461942257217847, "calibration/coverage@1%": 0.04461942257217847, "calibration/coverage@10%": 0.2860892388451443, "calibration/coverage@15%": 0.458953429250663, "calibration/coverage@20%": 0.7403910898572234, "calibration/coverage@25%": 0.9528406462281219, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.19072615923009625, "calibration/ece": 0.08832369333942824, "calibration/mean_confidence": 0.7594015624133713, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011574074074074106, "completions/max_length": 3409.3333333333335, "completions/max_terminated_length": 3409.3333333333335, "completions/mean_length": 896.012451171875, "completions/mean_terminated_length": 906.8702392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 265.3333333333333, "epoch": 0.49919376007799904, "num_tokens": 497360332.0, "reward": 1.0603783528010051, "reward_std": 0.13254794230063757, "rewards/accuracy_reward": 0.6896701256434122, "rewards/brier_reward": 0.7949030995368958, "rewards/confidence_uniqueness_reward": 0.9295975764592489, "rewards/format_reward": 0.9884259303410848, "rewards/frontier_aurc_reward": -0.00355257714788119, "rewards/frontier_coverage_1": 0.011394298480202755, "rewards/frontier_coverage_10": 0.010870846764494976, "rewards/frontier_coverage_15": 0.014215116699536642, "rewards/frontier_coverage_20": 0.05826817204554876, "rewards/frontier_coverage_25": 0.19660960137844086, "rewards/frontier_coverage_5": 0.011394298480202755, "rewards/frontier_ece_reward": 0.0032965668166677156, "signal/accuracy_reward/centered_abs_mean": 0.15244321525096893, "signal/accuracy_reward/group_std_mean": 0.2041437178850174, "signal/accuracy_reward/group_zero_std_frac": 0.4166666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07622160762548447, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07622160762548447, "signal/advantage_abs_mean": 0.09584795186916988, "signal/advantage_pre_scale_abs_mean": 0.09584795186916988, "signal/advantage_pre_scale_std": 0.16963096956411997, "signal/advantage_std": 0.16963096956411997, "signal/brier_reward/centered_abs_mean": 0.14546114454666773, "signal/brier_reward/group_std_mean": 0.1875475843747457, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018182643068333466, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018182643068333466, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.041573978339632355, "signal/confidence_uniqueness_reward/group_std_mean": 0.06479879096150398, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005196747292454044, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005196747292454044, "signal/format_reward/centered_abs_mean": 0.020055700559169054, "signal/format_reward/group_std_mean": 0.03889835067093372, "signal/format_reward/group_zero_std_frac": 0.8379629651705424, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010027850279584527, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010027850279584527, "signal/frontier_aurc_reward/centered_abs_mean": 0.00507251297434171, "signal/frontier_aurc_reward/group_std_mean": 0.008938198909163475, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.07979726131695e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.07979726131695e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11825272192557652, "signal/frontier_coverage_1/group_std_mean": 0.16622615853945413, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002116723839814464, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002116723839814464, "signal/frontier_coverage_10/centered_abs_mean": 0.11266253391901652, "signal/frontier_coverage_10/group_std_mean": 0.15934370954831442, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002016659282768766, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002016659282768766, "signal/frontier_coverage_15/centered_abs_mean": 0.09346077839533488, "signal/frontier_coverage_15/group_std_mean": 0.13282916943232217, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016729478569080432, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016729478569080432, "signal/frontier_coverage_20/centered_abs_mean": 0.07203005999326706, "signal/frontier_coverage_20/group_std_mean": 0.09641539553801219, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012893380674843986, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012893380674843986, "signal/frontier_coverage_25/centered_abs_mean": 0.13922655334075293, "signal/frontier_coverage_25/group_std_mean": 0.17964440087477365, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024921553364644447, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024921553364644447, "signal/frontier_coverage_5/centered_abs_mean": 0.11825272192557652, "signal/frontier_coverage_5/group_std_mean": 0.16622615853945413, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002116723839814464, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002116723839814464, "signal/frontier_ece_reward/centered_abs_mean": 0.009944108004371325, "signal/frontier_ece_reward/group_std_mean": 0.013924115958313147, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012430135005464156, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012430135005464156, "step": 208, "total_flos": 0.0, "train_loss": -0.00542832244760715, "train_runtime": 18893.3969, "train_samples_per_second": 0.794, "train_steps_per_second": 0.011 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 497360332, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }