{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.36967823482088125, "calibration/batch_distribution_entropy": 0.7862026048473938, "calibration/confidence_entropy": 0.29413324102623034, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.08602150537634409, "calibration/coverage@20%": 0.11326164874551972, "calibration/coverage@25%": 0.13189964157706094, "calibration/coverage@30%": 0.16774193548387098, "calibration/coverage@5%": 0.0, "calibration/ece": 0.24784627572046558, "calibration/mean_confidence": 0.6899520713084326, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.043923611111111115, "completions/max_length": 4052.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 1002.0799560546875, "completions/mean_terminated_length": 1048.1506591796874, "completions/min_length": 0.0, "completions/min_terminated_length": 215.0, "epoch": 0.011999850001874977, "grad_norm": 0.0019047551322728395, "learning_rate": 5.952380952380953e-07, "loss": -0.0216, "num_tokens": 14658169.0, "reward": 0.7684701919555664, "reward_std": 0.5013577401638031, "rewards/accuracy_reward": 0.34930555820465087, "rewards/brier_reward": 0.49733580350875856, "rewards/confidence_uniqueness_reward": 0.49042509198188783, "rewards/format_reward": 0.7189236164093018, "rewards/frontier_aurc_reward": 0.4430105030536652, "rewards/frontier_coverage_1": 0.4430105030536652, "rewards/frontier_coverage_10": 0.4430105030536652, "rewards/frontier_coverage_15": 0.4430105030536652, "rewards/frontier_coverage_20": 0.4430105030536652, "rewards/frontier_coverage_25": 0.4430105030536652, "rewards/frontier_coverage_5": 0.4430105030536652, "rewards/frontier_ece_reward": 0.4430105030536652, "signal/accuracy_reward/centered_abs_mean": 0.3296006917953491, "signal/accuracy_reward/group_std_mean": 0.38530040979385377, "signal/accuracy_reward/group_zero_std_frac": 0.07500000223517418, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16480034589767456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16480034589767456, "signal/advantage_abs_mean": 0.42119367718696593, "signal/advantage_pre_scale_abs_mean": 0.42119367718696593, "signal/advantage_pre_scale_std": 0.502716064453125, "signal/advantage_std": 0.502716064453125, "signal/brier_reward/centered_abs_mean": 0.3650480568408966, "signal/brier_reward/group_std_mean": 0.4109442591667175, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04563100710511207, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04563100710511207, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.33325800895690916, "signal/confidence_uniqueness_reward/group_std_mean": 0.37558268308639525, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041657251119613645, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.041657251119613645, "signal/format_reward/centered_abs_mean": 0.3422960102558136, "signal/format_reward/group_std_mean": 0.41390294432640073, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1711480051279068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1711480051279068, "signal/frontier_aurc_reward/centered_abs_mean": 0.36038182973861693, "signal/frontier_aurc_reward/group_std_mean": 0.4062395691871643, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_1/centered_abs_mean": 0.36038182973861693, "signal/frontier_coverage_1/group_std_mean": 0.4062395691871643, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_10/centered_abs_mean": 0.36038182973861693, "signal/frontier_coverage_10/group_std_mean": 0.4062395691871643, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_15/centered_abs_mean": 0.36038182973861693, "signal/frontier_coverage_15/group_std_mean": 0.4062395691871643, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_20/centered_abs_mean": 0.36038182973861693, "signal/frontier_coverage_20/group_std_mean": 0.4062395691871643, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_25/centered_abs_mean": 0.36038182973861693, "signal/frontier_coverage_25/group_std_mean": 0.4062395691871643, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_5/centered_abs_mean": 0.36038182973861693, "signal/frontier_coverage_5/group_std_mean": 0.4062395691871643, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006450834404677153, "signal/frontier_ece_reward/centered_abs_mean": 0.36038182973861693, "signal/frontier_ece_reward/group_std_mean": 0.4062395691871643, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.04504772871732712, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.04504772871732712, "step": 5 }, { "calibration/aurc": 0.32589507602858425, "calibration/batch_distribution_entropy": 0.7834614298983764, "calibration/confidence_entropy": 0.3043506317779947, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.06793650793650793, "calibration/coverage@20%": 0.11428571428571428, "calibration/coverage@25%": 0.3277179106936893, "calibration/coverage@30%": 0.4927966937433446, "calibration/coverage@5%": 0.0, "calibration/ece": 0.24205344081355856, "calibration/mean_confidence": 0.6932854483595139, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04166666666666667, "completions/max_length": 3819.0, "completions/max_terminated_length": 3819.0, "completions/mean_length": 964.8975708007813, "completions/mean_terminated_length": 1007.06064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 243.4, "epoch": 0.023999700003749954, "grad_norm": 0.0010482864454388618, "learning_rate": 1.1904761904761906e-06, "loss": -0.0304, "num_tokens": 28856509.0, "reward": 0.8811465501785278, "reward_std": 0.44658924341201783, "rewards/accuracy_reward": 0.4013020873069763, "rewards/brier_reward": 0.5648763060569764, "rewards/confidence_uniqueness_reward": 0.5875549912452698, "rewards/format_reward": 0.8222222208976746, "rewards/frontier_aurc_reward": 0.5007211208343506, "rewards/frontier_coverage_1": 0.5007211208343506, "rewards/frontier_coverage_10": 0.5007211208343506, "rewards/frontier_coverage_15": 0.5007211208343506, "rewards/frontier_coverage_20": 0.5007211208343506, "rewards/frontier_coverage_25": 0.5007211208343506, "rewards/frontier_coverage_5": 0.5007211208343506, "rewards/frontier_ece_reward": 0.5007211208343506, "signal/accuracy_reward/centered_abs_mean": 0.3191786050796509, "signal/accuracy_reward/group_std_mean": 0.3830325841903687, "signal/accuracy_reward/group_zero_std_frac": 0.05555555615574122, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15958930253982545, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15958930253982545, "signal/advantage_abs_mean": 0.35644559264183046, "signal/advantage_pre_scale_abs_mean": 0.35644559264183046, "signal/advantage_pre_scale_std": 0.4495138943195343, "signal/advantage_std": 0.4495138943195343, "signal/brier_reward/centered_abs_mean": 0.3444161355495453, "signal/brier_reward/group_std_mean": 0.39449760913848875, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.043052016943693164, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.043052016943693164, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.3033804833889008, "signal/confidence_uniqueness_reward/group_std_mean": 0.34619358777999876, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0379225604236126, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0379225604236126, "signal/format_reward/centered_abs_mean": 0.2525716066360474, "signal/format_reward/group_std_mean": 0.3450364053249359, "signal/format_reward/group_zero_std_frac": 0.03333333358168602, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1262858033180237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1262858033180237, "signal/frontier_aurc_reward/centered_abs_mean": 0.35025461912155154, "signal/frontier_aurc_reward/group_std_mean": 0.39719846844673157, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_1/centered_abs_mean": 0.35025461912155154, "signal/frontier_coverage_1/group_std_mean": 0.39719846844673157, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_10/centered_abs_mean": 0.35025461912155154, "signal/frontier_coverage_10/group_std_mean": 0.39719846844673157, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_15/centered_abs_mean": 0.35025461912155154, "signal/frontier_coverage_15/group_std_mean": 0.39719846844673157, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_20/centered_abs_mean": 0.35025461912155154, "signal/frontier_coverage_20/group_std_mean": 0.39719846844673157, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_25/centered_abs_mean": 0.35025461912155154, "signal/frontier_coverage_25/group_std_mean": 0.39719846844673157, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_5/centered_abs_mean": 0.35025461912155154, "signal/frontier_coverage_5/group_std_mean": 0.39719846844673157, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0062695578671991825, "signal/frontier_ece_reward/centered_abs_mean": 0.35025461912155154, "signal/frontier_ece_reward/group_std_mean": 0.39719846844673157, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.04378182739019394, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.04378182739019394, "step": 10 }, { "calibration/aurc": 0.381261368050961, "calibration/batch_distribution_entropy": 0.8395895245268612, "calibration/confidence_entropy": 0.3122351741378401, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.06910112359550562, "calibration/coverage@20%": 0.1, "calibration/coverage@25%": 0.1846394737670349, "calibration/coverage@30%": 0.31916833953903984, "calibration/coverage@5%": 0.0, "calibration/ece": 0.23610914584010007, "calibration/mean_confidence": 0.6068558516881933, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04366319444444444, "completions/max_length": 4003.8, "completions/max_terminated_length": 4003.8, "completions/mean_length": 960.4666748046875, "completions/mean_terminated_length": 1004.5089233398437, "completions/min_length": 0.0, "completions/min_terminated_length": 264.4, "epoch": 0.03599955000562493, "grad_norm": 0.0007612162153236568, "learning_rate": 1.7857142857142859e-06, "loss": -0.0424, "num_tokens": 43023069.0, "reward": 0.9628929138183594, "reward_std": 0.3616787016391754, "rewards/accuracy_reward": 0.39479166865348814, "rewards/brier_reward": 0.6317438364028931, "rewards/confidence_uniqueness_reward": 0.7069078326225281, "rewards/format_reward": 0.9173611164093017, "rewards/frontier_aurc_reward": 0.5572714924812316, "rewards/frontier_coverage_1": 0.5572714924812316, "rewards/frontier_coverage_10": 0.5572714924812316, "rewards/frontier_coverage_15": 0.5572714924812316, "rewards/frontier_coverage_20": 0.5572714924812316, "rewards/frontier_coverage_25": 0.5572714924812316, "rewards/frontier_coverage_5": 0.5572714924812316, "rewards/frontier_ece_reward": 0.5572714924812316, "signal/accuracy_reward/centered_abs_mean": 0.3097439229488373, "signal/accuracy_reward/group_std_mean": 0.3701206386089325, "signal/accuracy_reward/group_zero_std_frac": 0.09444444626569748, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15487196147441865, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15487196147441865, "signal/advantage_abs_mean": 0.27835277616977694, "signal/advantage_pre_scale_abs_mean": 0.27835277616977694, "signal/advantage_pre_scale_std": 0.36725740432739257, "signal/advantage_std": 0.36725740432739257, "signal/brier_reward/centered_abs_mean": 0.3213726282119751, "signal/brier_reward/group_std_mean": 0.3753271162509918, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04017157852649689, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04017157852649689, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21762884259223939, "signal/confidence_uniqueness_reward/group_std_mean": 0.27037290930747987, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027203605324029923, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027203605324029923, "signal/format_reward/centered_abs_mean": 0.13908420354127884, "signal/format_reward/group_std_mean": 0.23201583325862885, "signal/format_reward/group_zero_std_frac": 0.1833333358168602, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06954210177063942, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.06954210177063942, "signal/frontier_aurc_reward/centered_abs_mean": 0.3441716134548187, "signal/frontier_aurc_reward/group_std_mean": 0.3906739056110382, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_1/centered_abs_mean": 0.3441716134548187, "signal/frontier_coverage_1/group_std_mean": 0.3906739056110382, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_10/centered_abs_mean": 0.3441716134548187, "signal/frontier_coverage_10/group_std_mean": 0.3906739056110382, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_15/centered_abs_mean": 0.3441716134548187, "signal/frontier_coverage_15/group_std_mean": 0.3906739056110382, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_20/centered_abs_mean": 0.3441716134548187, "signal/frontier_coverage_20/group_std_mean": 0.3906739056110382, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_25/centered_abs_mean": 0.3441716134548187, "signal/frontier_coverage_25/group_std_mean": 0.3906739056110382, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_5/centered_abs_mean": 0.3441716134548187, "signal/frontier_coverage_5/group_std_mean": 0.3906739056110382, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006160671729594469, "signal/frontier_ece_reward/centered_abs_mean": 0.3441716134548187, "signal/frontier_ece_reward/group_std_mean": 0.3906739056110382, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.04302145168185234, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.04302145168185234, "step": 15 }, { "calibration/aurc": 0.4027054789391406, "calibration/batch_distribution_entropy": 0.9028449005084616, "calibration/buffer_distribution_entropy": 0.8350432189471256, "calibration/confidence_entropy": 0.3532060861660855, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.05249343832020997, "calibration/coverage@20%": 0.17029932428659184, "calibration/coverage@25%": 0.2729503870355258, "calibration/coverage@30%": 0.333190300423154, "calibration/coverage@5%": 0.0, "calibration/ece": 0.22552573548401694, "calibration/mean_confidence": 0.49134779142387586, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03498263888888886, "completions/max_length": 3970.4, "completions/max_terminated_length": 3970.4, "completions/mean_length": 953.0729248046875, "completions/mean_terminated_length": 987.8788208007812, "completions/min_length": 0.0, "completions/min_terminated_length": 258.4, "epoch": 0.04799940000749991, "grad_norm": 0.0005136204999871552, "learning_rate": 2.380952380952381e-06, "loss": -0.0324, "num_tokens": 57116165.0, "reward": 0.9295851826667786, "reward_std": 0.23658435344696044, "rewards/accuracy_reward": 0.4424479126930237, "rewards/brier_reward": 0.6920093059539795, "rewards/confidence_uniqueness_reward": 0.7875280380249023, "rewards/format_reward": 0.96015625, "rewards/frontier_aurc_reward": 0.11949877790175378, "rewards/frontier_coverage_1": 0.22551958113908768, "rewards/frontier_coverage_10": 0.22551958113908768, "rewards/frontier_coverage_15": 0.22551958113908768, "rewards/frontier_coverage_20": 0.22551958113908768, "rewards/frontier_coverage_25": 0.22551958113908768, "rewards/frontier_coverage_5": 0.22551958113908768, "rewards/frontier_ece_reward": 0.1358485657721758, "signal/accuracy_reward/centered_abs_mean": 0.2820800840854645, "signal/accuracy_reward/group_std_mean": 0.34979713559150694, "signal/accuracy_reward/group_zero_std_frac": 0.10000000149011612, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14104004204273224, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14104004204273224, "signal/advantage_abs_mean": 0.1771962672472, "signal/advantage_pre_scale_abs_mean": 0.1771962672472, "signal/advantage_pre_scale_std": 0.24651205241680146, "signal/advantage_std": 0.24651205241680146, "signal/brier_reward/centered_abs_mean": 0.2946962535381317, "signal/brier_reward/group_std_mean": 0.350597482919693, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036837031692266466, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.036837031692266466, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15454766154289246, "signal/confidence_uniqueness_reward/group_std_mean": 0.19427459239959716, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019318457692861557, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019318457692861557, "signal/format_reward/centered_abs_mean": 0.06775716096162795, "signal/format_reward/group_std_mean": 0.12582100331783294, "signal/format_reward/group_zero_std_frac": 0.4916666805744171, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03387858048081398, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.03387858048081398, "signal/frontier_aurc_reward/centered_abs_mean": 0.07141957976855337, "signal/frontier_aurc_reward/group_std_mean": 0.08215194744989276, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0012784103186277208, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0012784103186277208, "signal/frontier_coverage_1/centered_abs_mean": 0.3562255322933197, "signal/frontier_coverage_1/group_std_mean": 0.45040944814682005, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_10/centered_abs_mean": 0.3562255322933197, "signal/frontier_coverage_10/group_std_mean": 0.45040944814682005, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_15/centered_abs_mean": 0.3562255322933197, "signal/frontier_coverage_15/group_std_mean": 0.45040944814682005, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_20/centered_abs_mean": 0.3562255322933197, "signal/frontier_coverage_20/group_std_mean": 0.45040944814682005, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_25/centered_abs_mean": 0.3562255322933197, "signal/frontier_coverage_25/group_std_mean": 0.45040944814682005, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_5/centered_abs_mean": 0.3562255322933197, "signal/frontier_coverage_5/group_std_mean": 0.45040944814682005, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006376436538994312, "signal/frontier_ece_reward/centered_abs_mean": 0.1151403695344925, "signal/frontier_ece_reward/group_std_mean": 0.13498959243297576, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014392546191811562, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014392546191811562, "step": 20 }, { "calibration/aurc": 0.3614334508581706, "calibration/batch_distribution_entropy": 0.9248183320981849, "calibration/buffer_distribution_entropy": 0.873212498255721, "calibration/confidence_entropy": 0.4128546769676536, "calibration/coverage@0%": 0.014285714285714285, "calibration/coverage@1%": 0.014285714285714285, "calibration/coverage@10%": 0.09585753000387147, "calibration/coverage@15%": 0.14609627048651438, "calibration/coverage@20%": 0.1788488837269325, "calibration/coverage@25%": 0.24317976513098466, "calibration/coverage@30%": 0.37461607949412823, "calibration/coverage@5%": 0.024867724867724865, "calibration/ece": 0.24605077384370877, "calibration/mean_confidence": 0.38649038439891786, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02725694444444442, "completions/max_length": 3480.2, "completions/max_terminated_length": 3480.2, "completions/mean_length": 929.60078125, "completions/mean_terminated_length": 955.7419311523438, "completions/min_length": 0.0, "completions/min_terminated_length": 270.0, "epoch": 0.05999925000937488, "grad_norm": 0.00046045094495639205, "learning_rate": 2.9761904761904763e-06, "loss": -0.025, "num_tokens": 70949614.0, "reward": 0.9276612401008606, "reward_std": 0.19562409222126007, "rewards/accuracy_reward": 0.47786458134651183, "rewards/brier_reward": 0.6938265442848206, "rewards/confidence_uniqueness_reward": 0.8463589429855347, "rewards/format_reward": 0.9715277791023255, "rewards/frontier_aurc_reward": -0.003473227610811591, "rewards/frontier_coverage_1": 0.08209572061896324, "rewards/frontier_coverage_10": 0.08209572061896324, "rewards/frontier_coverage_15": 0.08209572061896324, "rewards/frontier_coverage_20": 0.08209572061896324, "rewards/frontier_coverage_25": 0.08209572061896324, "rewards/frontier_coverage_5": 0.08209572061896324, "rewards/frontier_ece_reward": 0.013495722971856593, "signal/accuracy_reward/centered_abs_mean": 0.293115234375, "signal/accuracy_reward/group_std_mean": 0.36036287546157836, "signal/accuracy_reward/group_zero_std_frac": 0.07777778003364802, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1465576171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1465576171875, "signal/advantage_abs_mean": 0.14612423181533812, "signal/advantage_pre_scale_abs_mean": 0.14612423181533812, "signal/advantage_pre_scale_std": 0.2055502027273178, "signal/advantage_std": 0.2055502027273178, "signal/brier_reward/centered_abs_mean": 0.27127314209938047, "signal/brier_reward/group_std_mean": 0.32418668270111084, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03390914276242256, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03390914276242256, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09788585007190705, "signal/confidence_uniqueness_reward/group_std_mean": 0.13583073616027833, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01223573125898838, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01223573125898838, "signal/format_reward/centered_abs_mean": 0.04701605886220932, "signal/format_reward/group_std_mean": 0.08671137690544128, "signal/format_reward/group_zero_std_frac": 0.6472222447395325, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02350802943110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02350802943110466, "signal/frontier_aurc_reward/centered_abs_mean": 0.002270214632153511, "signal/frontier_aurc_reward/group_std_mean": 0.0033901261631399393, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0636840276420115e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0636840276420115e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.39322843551635744, "signal/frontier_coverage_1/group_std_mean": 0.48915119767189025, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_10/centered_abs_mean": 0.39322843551635744, "signal/frontier_coverage_10/group_std_mean": 0.48915119767189025, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_15/centered_abs_mean": 0.39322843551635744, "signal/frontier_coverage_15/group_std_mean": 0.48915119767189025, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_20/centered_abs_mean": 0.39322843551635744, "signal/frontier_coverage_20/group_std_mean": 0.48915119767189025, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_25/centered_abs_mean": 0.39322843551635744, "signal/frontier_coverage_25/group_std_mean": 0.48915119767189025, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_5/centered_abs_mean": 0.39322843551635744, "signal/frontier_coverage_5/group_std_mean": 0.48915119767189025, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007038788590580225, "signal/frontier_ece_reward/centered_abs_mean": 0.04797838628292084, "signal/frontier_ece_reward/group_std_mean": 0.061146382987499234, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005997298285365105, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005997298285365105, "step": 25 }, { "calibration/aurc": 0.2923284819883589, "calibration/batch_distribution_entropy": 0.9250302282912619, "calibration/buffer_distribution_entropy": 0.9063994573641947, "calibration/confidence_entropy": 0.4841688589725296, "calibration/coverage@0%": 0.008497994354479274, "calibration/coverage@1%": 0.008497994354479274, "calibration/coverage@10%": 0.05956929646528768, "calibration/coverage@15%": 0.10252254939375434, "calibration/coverage@20%": 0.19076890769300306, "calibration/coverage@25%": 0.3561873535093358, "calibration/coverage@30%": 0.5256556345399412, "calibration/coverage@5%": 0.008497994354479274, "calibration/ece": 0.21817575367042902, "calibration/mean_confidence": 0.3925755252888396, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025520833333333305, "completions/max_length": 3789.4, "completions/max_terminated_length": 3789.4, "completions/mean_length": 903.4365356445312, "completions/mean_terminated_length": 927.2119506835937, "completions/min_length": 0.0, "completions/min_terminated_length": 270.6, "epoch": 0.07199910001124986, "grad_norm": 0.0004290464275982231, "learning_rate": 3.5714285714285718e-06, "loss": -0.0226, "num_tokens": 84467123.0, "reward": 0.9338800191879273, "reward_std": 0.1860422283411026, "rewards/accuracy_reward": 0.4858506917953491, "rewards/brier_reward": 0.7087279558181763, "rewards/confidence_uniqueness_reward": 0.8569933891296386, "rewards/format_reward": 0.9730902910232544, "rewards/frontier_aurc_reward": -0.0031799635384231807, "rewards/frontier_coverage_1": 0.06800358705222606, "rewards/frontier_coverage_10": 0.06800358705222606, "rewards/frontier_coverage_15": 0.06800358705222606, "rewards/frontier_coverage_20": 0.06800358705222606, "rewards/frontier_coverage_25": 0.06800358705222606, "rewards/frontier_coverage_5": 0.06800358705222606, "rewards/frontier_ece_reward": 0.01158127374947071, "signal/accuracy_reward/centered_abs_mean": 0.2845431864261627, "signal/accuracy_reward/group_std_mean": 0.35211129784584044, "signal/accuracy_reward/group_zero_std_frac": 0.08888889104127884, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14227159321308136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14227159321308136, "signal/advantage_abs_mean": 0.13841767907142638, "signal/advantage_pre_scale_abs_mean": 0.13841767907142638, "signal/advantage_pre_scale_std": 0.19867367446422576, "signal/advantage_std": 0.19867367446422576, "signal/brier_reward/centered_abs_mean": 0.2366869866847992, "signal/brier_reward/group_std_mean": 0.28489047288894653, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0295858733355999, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0295858733355999, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09566812962293625, "signal/confidence_uniqueness_reward/group_std_mean": 0.13221579790115356, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011958516202867031, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011958516202867031, "signal/format_reward/centered_abs_mean": 0.0448133684694767, "signal/format_reward/group_std_mean": 0.0826771542429924, "signal/format_reward/group_zero_std_frac": 0.6638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02240668423473835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02240668423473835, "signal/frontier_aurc_reward/centered_abs_mean": 0.001989086694084108, "signal/frontier_aurc_reward/group_std_mean": 0.0028138306923210623, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.560464974725619e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.560464974725619e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.3589896261692047, "signal/frontier_coverage_1/group_std_mean": 0.4434766948223114, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_10/centered_abs_mean": 0.3589896261692047, "signal/frontier_coverage_10/group_std_mean": 0.4434766948223114, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_15/centered_abs_mean": 0.3589896261692047, "signal/frontier_coverage_15/group_std_mean": 0.4434766948223114, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_20/centered_abs_mean": 0.3589896261692047, "signal/frontier_coverage_20/group_std_mean": 0.4434766948223114, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_25/centered_abs_mean": 0.3589896261692047, "signal/frontier_coverage_25/group_std_mean": 0.4434766948223114, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_5/centered_abs_mean": 0.3589896261692047, "signal/frontier_coverage_5/group_std_mean": 0.4434766948223114, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006425914168357849, "signal/frontier_ece_reward/centered_abs_mean": 0.03801303133368492, "signal/frontier_ece_reward/group_std_mean": 0.04951959177851677, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004751628916710615, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004751628916710615, "step": 30 }, { "calibration/aurc": 0.3133094829157582, "calibration/batch_distribution_entropy": 0.9464168853922367, "calibration/buffer_distribution_entropy": 0.9315091404755963, "calibration/confidence_entropy": 0.5289805554461637, "calibration/coverage@0%": 0.00853084623941131, "calibration/coverage@1%": 0.00853084623941131, "calibration/coverage@10%": 0.06184527374738721, "calibration/coverage@15%": 0.11876790247201814, "calibration/coverage@20%": 0.22484751066508574, "calibration/coverage@25%": 0.31518733588180836, "calibration/coverage@30%": 0.4264904932361337, "calibration/coverage@5%": 0.020896437637260772, "calibration/ece": 0.16186389460902642, "calibration/mean_confidence": 0.47679469896631954, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02578125, "completions/max_length": 3835.6, "completions/max_terminated_length": 3835.6, "completions/mean_length": 876.8658081054688, "completions/mean_terminated_length": 900.225390625, "completions/min_length": 0.0, "completions/min_terminated_length": 299.4, "epoch": 0.08399895001312484, "grad_norm": 0.0005008935695514083, "learning_rate": 4.166666666666667e-06, "loss": -0.0211, "num_tokens": 97646057.0, "reward": 0.9478654384613037, "reward_std": 0.19103043675422668, "rewards/accuracy_reward": 0.5026041567325592, "rewards/brier_reward": 0.7327090382575989, "rewards/confidence_uniqueness_reward": 0.8808719396591187, "rewards/format_reward": 0.9735243082046509, "rewards/frontier_aurc_reward": -0.003296623891219497, "rewards/frontier_coverage_1": 0.06155742183327675, "rewards/frontier_coverage_10": 0.06155742183327675, "rewards/frontier_coverage_15": 0.06155742183327675, "rewards/frontier_coverage_20": 0.06155742183327675, "rewards/frontier_coverage_25": 0.06155742183327675, "rewards/frontier_coverage_5": 0.06155742183327675, "rewards/frontier_ece_reward": 0.012410728633403778, "signal/accuracy_reward/centered_abs_mean": 0.2734483480453491, "signal/accuracy_reward/group_std_mean": 0.3328505277633667, "signal/accuracy_reward/group_zero_std_frac": 0.1583333358168602, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13672417402267456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13672417402267456, "signal/advantage_abs_mean": 0.14573031067848205, "signal/advantage_pre_scale_abs_mean": 0.14573031067848205, "signal/advantage_pre_scale_std": 0.20383856296539307, "signal/advantage_std": 0.20383856296539307, "signal/brier_reward/centered_abs_mean": 0.2111766368150711, "signal/brier_reward/group_std_mean": 0.2590035915374756, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026397079601883887, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.026397079601883887, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08130226284265518, "signal/confidence_uniqueness_reward/group_std_mean": 0.11520479023456573, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010162782855331898, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010162782855331898, "signal/format_reward/centered_abs_mean": 0.04118381068110466, "signal/format_reward/group_std_mean": 0.07573343813419342, "signal/format_reward/group_zero_std_frac": 0.6944444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02059190534055233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02059190534055233, "signal/frontier_aurc_reward/centered_abs_mean": 0.002494776528328657, "signal/frontier_aurc_reward/group_std_mean": 0.0035075797699391843, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.465649835765362e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.465649835765362e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.28815833330154417, "signal/frontier_coverage_1/group_std_mean": 0.3588971734046936, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_10/centered_abs_mean": 0.28815833330154417, "signal/frontier_coverage_10/group_std_mean": 0.3588971734046936, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_15/centered_abs_mean": 0.28815833330154417, "signal/frontier_coverage_15/group_std_mean": 0.3588971734046936, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_20/centered_abs_mean": 0.28815833330154417, "signal/frontier_coverage_20/group_std_mean": 0.3588971734046936, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_25/centered_abs_mean": 0.28815833330154417, "signal/frontier_coverage_25/group_std_mean": 0.3588971734046936, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_5/centered_abs_mean": 0.28815833330154417, "signal/frontier_coverage_5/group_std_mean": 0.3588971734046936, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005158033780753612, "signal/frontier_ece_reward/centered_abs_mean": 0.032915469631552695, "signal/frontier_ece_reward/group_std_mean": 0.04370521605014801, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004114433703944087, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004114433703944087, "step": 35 }, { "calibration/aurc": 0.27142265360692236, "calibration/batch_distribution_entropy": 0.9276274667151082, "calibration/buffer_distribution_entropy": 0.9453424455999473, "calibration/confidence_entropy": 0.4904303911764786, "calibration/coverage@0%": 0.013755724139428155, "calibration/coverage@1%": 0.013755724139428155, "calibration/coverage@10%": 0.11803744967342229, "calibration/coverage@15%": 0.16277429177868546, "calibration/coverage@20%": 0.331587336757707, "calibration/coverage@25%": 0.44951244122495027, "calibration/coverage@30%": 0.5766922779499473, "calibration/coverage@5%": 0.04112414519205974, "calibration/ece": 0.10869480101542654, "calibration/mean_confidence": 0.6108967077387699, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021961805555555557, "completions/max_length": 3859.8, "completions/max_terminated_length": 3859.8, "completions/mean_length": 878.8442749023437, "completions/mean_terminated_length": 898.7201538085938, "completions/min_length": 0.0, "completions/min_terminated_length": 276.4, "epoch": 0.09599880001499982, "grad_norm": 0.0006976813892833889, "learning_rate": 4.761904761904762e-06, "loss": -0.0213, "num_tokens": 110889863.0, "reward": 0.9803658723831177, "reward_std": 0.19682038128376006, "rewards/accuracy_reward": 0.5561631917953491, "rewards/brier_reward": 0.749631917476654, "rewards/confidence_uniqueness_reward": 0.9038300752639771, "rewards/format_reward": 0.9776909708976745, "rewards/frontier_aurc_reward": -0.003466126276180148, "rewards/frontier_coverage_1": 0.04215884767472744, "rewards/frontier_coverage_10": 0.04215884767472744, "rewards/frontier_coverage_15": 0.04215884767472744, "rewards/frontier_coverage_20": 0.04215884767472744, "rewards/frontier_coverage_25": 0.04165246896445751, "rewards/frontier_coverage_5": 0.04215884767472744, "rewards/frontier_ece_reward": 0.018394294753670694, "signal/accuracy_reward/centered_abs_mean": 0.2570475250482559, "signal/accuracy_reward/group_std_mean": 0.3235936462879181, "signal/accuracy_reward/group_zero_std_frac": 0.1472222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12852376252412795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12852376252412795, "signal/advantage_abs_mean": 0.14671072959899903, "signal/advantage_pre_scale_abs_mean": 0.14671072959899903, "signal/advantage_pre_scale_std": 0.21450220942497253, "signal/advantage_std": 0.21450220942497253, "signal/brier_reward/centered_abs_mean": 0.19289257526397705, "signal/brier_reward/group_std_mean": 0.24276741445064545, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02411157190799713, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02411157190799713, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07457488030195236, "signal/confidence_uniqueness_reward/group_std_mean": 0.11184274405241013, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009321860037744045, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009321860037744045, "signal/format_reward/centered_abs_mean": 0.03830837681889534, "signal/format_reward/group_std_mean": 0.07467902153730392, "signal/format_reward/group_zero_std_frac": 0.6833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01915418840944767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01915418840944767, "signal/frontier_aurc_reward/centered_abs_mean": 0.003243405232205987, "signal/frontier_aurc_reward/group_std_mean": 0.004562646104022861, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.805695327580907e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.805695327580907e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1917881727218628, "signal/frontier_coverage_1/group_std_mean": 0.2579316467046738, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_10/centered_abs_mean": 0.1917881727218628, "signal/frontier_coverage_10/group_std_mean": 0.2579316467046738, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_15/centered_abs_mean": 0.1917881727218628, "signal/frontier_coverage_15/group_std_mean": 0.2579316467046738, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_20/centered_abs_mean": 0.1917881727218628, "signal/frontier_coverage_20/group_std_mean": 0.2579316467046738, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_25/centered_abs_mean": 0.19005673825740815, "signal/frontier_coverage_25/group_std_mean": 0.25573796927928927, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003402015473693609, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003402015473693609, "signal/frontier_coverage_5/centered_abs_mean": 0.1917881727218628, "signal/frontier_coverage_5/group_std_mean": 0.2579316467046738, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003433008212596178, "signal/frontier_ece_reward/centered_abs_mean": 0.03396204262971878, "signal/frontier_ece_reward/group_std_mean": 0.043666718155145647, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004245255328714848, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004245255328714848, "step": 40 }, { "calibration/aurc": 0.21111682886197997, "calibration/batch_distribution_entropy": 0.7720451181894042, "calibration/buffer_distribution_entropy": 0.9473069889769613, "calibration/confidence_entropy": 0.3829034585720266, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.14865921706595187, "calibration/coverage@15%": 0.20355953537722676, "calibration/coverage@20%": 0.4912034258569726, "calibration/coverage@25%": 0.7245557363623337, "calibration/coverage@30%": 0.8875162025482677, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1270810302231555, "calibration/mean_confidence": 0.7599788657483719, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02786458333333335, "completions/max_length": 3859.8, "completions/max_terminated_length": 3859.8, "completions/mean_length": 915.6712768554687, "completions/mean_terminated_length": 942.2672607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 314.0, "epoch": 0.1079986500168748, "grad_norm": 0.000620553910266608, "learning_rate": 4.909638554216868e-06, "loss": -0.0259, "num_tokens": 124573660.0, "reward": 0.9803022265434265, "reward_std": 0.20692598223686218, "rewards/accuracy_reward": 0.5699652791023254, "rewards/brier_reward": 0.7361868262290955, "rewards/confidence_uniqueness_reward": 0.8916867733001709, "rewards/format_reward": 0.9720486164093017, "rewards/frontier_aurc_reward": -0.003825964545831084, "rewards/frontier_coverage_1": 0.03256035540252924, "rewards/frontier_coverage_10": 0.03256035540252924, "rewards/frontier_coverage_15": 0.03256035540252924, "rewards/frontier_coverage_20": 0.03256035540252924, "rewards/frontier_coverage_25": 0.03246962446719408, "rewards/frontier_coverage_5": 0.03256035540252924, "rewards/frontier_ece_reward": 0.01907350979745388, "signal/accuracy_reward/centered_abs_mean": 0.24231770634651184, "signal/accuracy_reward/group_std_mean": 0.31229459047317504, "signal/accuracy_reward/group_zero_std_frac": 0.14166666716337203, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12115885317325592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12115885317325592, "signal/advantage_abs_mean": 0.15380933582782746, "signal/advantage_pre_scale_abs_mean": 0.15380933582782746, "signal/advantage_pre_scale_std": 0.2281140685081482, "signal/advantage_std": 0.2281140685081482, "signal/brier_reward/centered_abs_mean": 0.1986512392759323, "signal/brier_reward/group_std_mean": 0.2531787097454071, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02483140490949154, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02483140490949154, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08986316919326783, "signal/confidence_uniqueness_reward/group_std_mean": 0.1289219468832016, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011232896149158478, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011232896149158478, "signal/format_reward/centered_abs_mean": 0.04635416753590107, "signal/format_reward/group_std_mean": 0.08056993260979653, "signal/format_reward/group_zero_std_frac": 0.694444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.023177083767950534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.023177083767950534, "signal/frontier_aurc_reward/centered_abs_mean": 0.003908289410173893, "signal/frontier_aurc_reward/group_std_mean": 0.005522150546312332, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.995837611611933e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.995837611611933e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1319861814379692, "signal/frontier_coverage_1/group_std_mean": 0.18985169529914855, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_10/centered_abs_mean": 0.1319861814379692, "signal/frontier_coverage_10/group_std_mean": 0.18985169529914855, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_15/centered_abs_mean": 0.1319861814379692, "signal/frontier_coverage_15/group_std_mean": 0.18985169529914855, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_20/centered_abs_mean": 0.1319861814379692, "signal/frontier_coverage_20/group_std_mean": 0.18985169529914855, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_25/centered_abs_mean": 0.12659270018339158, "signal/frontier_coverage_25/group_std_mean": 0.1825567066669464, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022660091053694487, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022660091053694487, "signal/frontier_coverage_5/centered_abs_mean": 0.1319861814379692, "signal/frontier_coverage_5/group_std_mean": 0.18985169529914855, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023625525878742336, "signal/frontier_ece_reward/centered_abs_mean": 0.033340536430478095, "signal/frontier_ece_reward/group_std_mean": 0.04229341298341751, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004167567053809762, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004167567053809762, "step": 45 }, { "calibration/aurc": 0.3430718095214229, "calibration/batch_distribution_entropy": 0.8041025516564181, "calibration/buffer_distribution_entropy": 0.9401868214478146, "calibration/confidence_entropy": 0.38931542096917787, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.014323607427055704, "calibration/coverage@15%": 0.040318302387267906, "calibration/coverage@20%": 0.15724626742288644, "calibration/coverage@25%": 0.25119180206947106, "calibration/coverage@30%": 0.386912761150543, "calibration/coverage@5%": 0.0, "calibration/ece": 0.22271492216092695, "calibration/mean_confidence": 0.729308284858438, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024739583333333325, "completions/max_length": 3747.8, "completions/max_terminated_length": 3747.8, "completions/mean_length": 950.247998046875, "completions/mean_terminated_length": 974.4024658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 289.4, "epoch": 0.11999850001874976, "grad_norm": 0.0004917439073324203, "learning_rate": 4.759036144578314e-06, "loss": -0.0236, "num_tokens": 138618117.0, "reward": 0.9939500212669372, "reward_std": 0.1924179255962372, "rewards/accuracy_reward": 0.5958333373069763, "rewards/brier_reward": 0.7404291272163391, "rewards/confidence_uniqueness_reward": 0.8836002588272095, "rewards/format_reward": 0.9748263835906983, "rewards/frontier_aurc_reward": -0.0037740686908364295, "rewards/frontier_coverage_1": 0.0278642563149333, "rewards/frontier_coverage_10": 0.0278642563149333, "rewards/frontier_coverage_15": 0.0278642563149333, "rewards/frontier_coverage_20": 0.0278642563149333, "rewards/frontier_coverage_25": 0.025865022838115693, "rewards/frontier_coverage_5": 0.0278642563149333, "rewards/frontier_ece_reward": 0.02181735634803772, "signal/accuracy_reward/centered_abs_mean": 0.22273220419883727, "signal/accuracy_reward/group_std_mean": 0.28220229744911196, "signal/accuracy_reward/group_zero_std_frac": 0.2388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11136610209941863, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11136610209941863, "signal/advantage_abs_mean": 0.14409471452236175, "signal/advantage_pre_scale_abs_mean": 0.14409471452236175, "signal/advantage_pre_scale_std": 0.21969010531902314, "signal/advantage_std": 0.21969010531902314, "signal/brier_reward/centered_abs_mean": 0.19170068800449372, "signal/brier_reward/group_std_mean": 0.24460790455341339, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023962586000561715, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023962586000561715, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09260407537221908, "signal/confidence_uniqueness_reward/group_std_mean": 0.12991546094417572, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011575509421527385, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011575509421527385, "signal/format_reward/centered_abs_mean": 0.04220920167863369, "signal/format_reward/group_std_mean": 0.07533831149339676, "signal/format_reward/group_zero_std_frac": 0.7055555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.021104600839316846, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.021104600839316846, "signal/frontier_aurc_reward/centered_abs_mean": 0.003983176313340664, "signal/frontier_aurc_reward/group_std_mean": 0.00584728941321373, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.129885198082775e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.129885198082775e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12392588853836059, "signal/frontier_coverage_1/group_std_mean": 0.18006423413753508, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_10/centered_abs_mean": 0.12392588853836059, "signal/frontier_coverage_10/group_std_mean": 0.18006423413753508, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_15/centered_abs_mean": 0.12392588853836059, "signal/frontier_coverage_15/group_std_mean": 0.18006423413753508, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_20/centered_abs_mean": 0.12392588853836059, "signal/frontier_coverage_20/group_std_mean": 0.18006423413753508, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_25/centered_abs_mean": 0.11814317256212234, "signal/frontier_coverage_25/group_std_mean": 0.17232573330402373, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002114762645214796, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002114762645214796, "signal/frontier_coverage_5/centered_abs_mean": 0.12392588853836059, "signal/frontier_coverage_5/group_std_mean": 0.18006423413753508, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022182733286172152, "signal/frontier_ece_reward/centered_abs_mean": 0.029776628687977792, "signal/frontier_ece_reward/group_std_mean": 0.037955837696790694, "signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003722078585997224, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003722078585997224, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.1801128277259354, "eval_calibration/batch_distribution_entropy": 0.7401758100779751, "eval_calibration/buffer_distribution_entropy": 0.9360676626033105, "eval_calibration/confidence_entropy": 0.3804461416023805, "eval_calibration/coverage@0%": 0.1562021072796935, "eval_calibration/coverage@1%": 0.1562021072796935, "eval_calibration/coverage@10%": 0.4049449233716475, "eval_calibration/coverage@15%": 0.5112667624521073, "eval_calibration/coverage@20%": 0.6722222222222222, "eval_calibration/coverage@25%": 0.7611111111111111, "eval_calibration/coverage@30%": 0.8875000000000001, "eval_calibration/coverage@5%": 0.1562021072796935, "eval_calibration/ece": 0.190010410979762, "eval_calibration/mean_confidence": 0.738599688144678, "eval_completions/clipped_ratio": 0.020833333333333332, "eval_completions/max_length": 2587.3333333333335, "eval_completions/max_terminated_length": 2587.3333333333335, "eval_completions/mean_length": 941.2585144042969, "eval_completions/mean_terminated_length": 961.3792317708334, "eval_completions/min_length": 73.33333333333333, "eval_completions/min_terminated_length": 390.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 138618117.0, "eval_reward": 1.0041676660378773, "eval_reward_std": 0.29189151525497437, "eval_rewards/accuracy_reward": 0.621527781089147, "eval_rewards/brier_reward": 0.7572712401549021, "eval_rewards/confidence_uniqueness_reward": 0.8511187533537546, "eval_rewards/format_reward": 0.9748263955116272, "eval_rewards/frontier_aurc_reward": -0.0029798958372945585, "eval_rewards/frontier_coverage_1": 0.021538497608465452, "eval_rewards/frontier_coverage_10": 0.021538497608465452, "eval_rewards/frontier_coverage_15": 0.021538497608465452, "eval_rewards/frontier_coverage_20": 0.021538497608465452, "eval_rewards/frontier_coverage_25": 0.021917358913924545, "eval_rewards/frontier_coverage_5": 0.021538497608465452, "eval_rewards/frontier_ece_reward": 0.021401030011475086, "eval_runtime": 215.5104, "eval_samples_per_second": 4.64, "eval_signal/accuracy_reward/centered_abs_mean": 0.4531249950329463, "eval_signal/accuracy_reward/group_std_mean": 0.48286275565624237, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22656249751647314, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22656249751647314, "eval_signal/advantage_abs_mean": 0.25273098051548004, "eval_signal/advantage_pre_scale_abs_mean": 0.25273098051548004, "eval_signal/advantage_pre_scale_std": 0.2910451292991638, "eval_signal/advantage_std": 0.2910451292991638, "eval_signal/brier_reward/centered_abs_mean": 0.24945268283287683, "eval_signal/brier_reward/group_std_mean": 0.3116542746623357, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031181585354109604, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.031181585354109604, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.09014085307717323, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13949279735485712, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011267606634646654, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011267606634646654, "eval_signal/format_reward/centered_abs_mean": 0.04736328109477957, "eval_signal/format_reward/group_std_mean": 0.11093035619705915, "eval_signal/format_reward/group_zero_std_frac": 0.4722222362955411, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.023681640547389787, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.023681640547389787, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004799036852394541, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007366780269270142, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.590275562407139e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.590275562407139e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.161320169766744, "eval_signal/frontier_coverage_1/group_std_mean": 0.25421082725127536, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.161320169766744, "eval_signal/frontier_coverage_10/group_std_mean": 0.25421082725127536, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.161320169766744, "eval_signal/frontier_coverage_15/group_std_mean": 0.25421082725127536, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.161320169766744, "eval_signal/frontier_coverage_20/group_std_mean": 0.25421082725127536, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.15456343442201614, "eval_signal/frontier_coverage_25/group_std_mean": 0.24467646330595016, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027666852499047914, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027666852499047914, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.161320169766744, "eval_signal/frontier_coverage_5/group_std_mean": 0.25421082725127536, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028876310292010507, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.04011191427707672, "eval_signal/frontier_ece_reward/group_std_mean": 0.047835673515995346, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00501398928463459, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00501398928463459, "eval_steps_per_second": 0.028, "step": 50 }, { "calibration/aurc": 0.23110574984938426, "calibration/batch_distribution_entropy": 0.8089199000138481, "calibration/buffer_distribution_entropy": 0.933102581412159, "calibration/confidence_entropy": 0.38733970684542757, "calibration/coverage@0%": 0.005305039787798409, "calibration/coverage@1%": 0.005305039787798409, "calibration/coverage@10%": 0.138139306638724, "calibration/coverage@15%": 0.298316360622093, "calibration/coverage@20%": 0.45166650464538166, "calibration/coverage@25%": 0.5443626720704854, "calibration/coverage@30%": 0.7179725549565843, "calibration/coverage@5%": 0.019666741915457985, "calibration/ece": 0.1603883890114183, "calibration/mean_confidence": 0.7279595248111649, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02378472222222221, "completions/max_length": 3765.0, "completions/max_terminated_length": 3765.0, "completions/mean_length": 989.8234497070313, "completions/mean_terminated_length": 1014.1861572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 282.0, "epoch": 0.13199835002062474, "grad_norm": 0.0004283857124391943, "learning_rate": 4.60843373493976e-06, "loss": -0.0216, "num_tokens": 153101459.0, "reward": 1.0128529906272887, "reward_std": 0.17540223300457, "rewards/accuracy_reward": 0.6284722328186035, "rewards/brier_reward": 0.7503431439399719, "rewards/confidence_uniqueness_reward": 0.9019472122192382, "rewards/format_reward": 0.9759548664093017, "rewards/frontier_aurc_reward": -0.0030153077095746994, "rewards/frontier_coverage_1": 0.015781766315922142, "rewards/frontier_coverage_10": 0.015781766315922142, "rewards/frontier_coverage_15": 0.015781766315922142, "rewards/frontier_coverage_20": 0.015781766315922142, "rewards/frontier_coverage_25": 0.015925674338359386, "rewards/frontier_coverage_5": 0.015781766315922142, "rewards/frontier_ece_reward": 0.01967682149261236, "signal/accuracy_reward/centered_abs_mean": 0.1988498240709305, "signal/accuracy_reward/group_std_mean": 0.2617811858654022, "signal/accuracy_reward/group_zero_std_frac": 0.25833333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09942491203546525, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09942491203546525, "signal/advantage_abs_mean": 0.1299208104610443, "signal/advantage_pre_scale_abs_mean": 0.1299208104610443, "signal/advantage_pre_scale_std": 0.20433064699172973, "signal/advantage_std": 0.20433064699172973, "signal/brier_reward/centered_abs_mean": 0.18966372907161713, "signal/brier_reward/group_std_mean": 0.23997350335121154, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02370796613395214, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02370796613395214, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07613965570926666, "signal/confidence_uniqueness_reward/group_std_mean": 0.1076380655169487, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009517456963658332, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009517456963658332, "signal/format_reward/centered_abs_mean": 0.0372667096555233, "signal/format_reward/group_std_mean": 0.0631372444331646, "signal/format_reward/group_zero_std_frac": 0.7583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01863335482776165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01863335482776165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030304885003715754, "signal/frontier_aurc_reward/group_std_mean": 0.004345366265624762, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.424574264907278e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.424574264907278e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14639625549316407, "signal/frontier_coverage_1/group_std_mean": 0.2078000247478485, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_10/centered_abs_mean": 0.14639625549316407, "signal/frontier_coverage_10/group_std_mean": 0.2078000247478485, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_15/centered_abs_mean": 0.14639625549316407, "signal/frontier_coverage_15/group_std_mean": 0.2078000247478485, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_20/centered_abs_mean": 0.14639625549316407, "signal/frontier_coverage_20/group_std_mean": 0.2078000247478485, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_25/centered_abs_mean": 0.13936484456062317, "signal/frontier_coverage_25/group_std_mean": 0.19845299422740936, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002494630683213472, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002494630683213472, "signal/frontier_coverage_5/centered_abs_mean": 0.14639625549316407, "signal/frontier_coverage_5/group_std_mean": 0.2078000247478485, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002620492875576019, "signal/frontier_ece_reward/centered_abs_mean": 0.028044429421424866, "signal/frontier_ece_reward/group_std_mean": 0.03536311313509941, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035055536776781083, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035055536776781083, "step": 55 }, { "calibration/aurc": 0.27031141258981295, "calibration/batch_distribution_entropy": 0.925694518685917, "calibration/buffer_distribution_entropy": 0.930078633270959, "calibration/confidence_entropy": 0.4497176248945136, "calibration/coverage@0%": 0.021467342760165452, "calibration/coverage@1%": 0.021467342760165452, "calibration/coverage@10%": 0.2214269049112599, "calibration/coverage@15%": 0.3252333291365323, "calibration/coverage@20%": 0.4505228062708378, "calibration/coverage@25%": 0.5076995105341562, "calibration/coverage@30%": 0.6556430446194226, "calibration/coverage@5%": 0.07958252600623875, "calibration/ece": 0.16927449984691845, "calibration/mean_confidence": 0.6089067489743154, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023697916666666673, "completions/max_length": 3999.4, "completions/max_terminated_length": 3999.4, "completions/mean_length": 1033.7720581054687, "completions/mean_terminated_length": 1058.8163940429688, "completions/min_length": 0.0, "completions/min_terminated_length": 306.0, "epoch": 0.14399820002249972, "grad_norm": 0.00038596263038925827, "learning_rate": 4.457831325301205e-06, "loss": -0.0209, "num_tokens": 168107089.0, "reward": 1.0082432866096496, "reward_std": 0.17611123621463776, "rewards/accuracy_reward": 0.6102430701255799, "rewards/brier_reward": 0.7585122108459472, "rewards/confidence_uniqueness_reward": 0.921110475063324, "rewards/format_reward": 0.9755208373069764, "rewards/frontier_aurc_reward": -0.00250103990547359, "rewards/frontier_coverage_1": 0.030346688139252363, "rewards/frontier_coverage_10": 0.030346688139252363, "rewards/frontier_coverage_15": 0.030346688139252363, "rewards/frontier_coverage_20": 0.030346688139252363, "rewards/frontier_coverage_25": 0.03001237902790308, "rewards/frontier_coverage_5": 0.030346688139252363, "rewards/frontier_ece_reward": 0.017600218765437603, "signal/accuracy_reward/centered_abs_mean": 0.2137261301279068, "signal/accuracy_reward/group_std_mean": 0.26997236013412473, "signal/accuracy_reward/group_zero_std_frac": 0.28611111342906953, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1068630650639534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1068630650639534, "signal/advantage_abs_mean": 0.13051027357578276, "signal/advantage_pre_scale_abs_mean": 0.13051027357578276, "signal/advantage_pre_scale_std": 0.20581234395503997, "signal/advantage_std": 0.20581234395503997, "signal/brier_reward/centered_abs_mean": 0.1883644551038742, "signal/brier_reward/group_std_mean": 0.23838137984275817, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023545556887984275, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023545556887984275, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06121076717972755, "signal/confidence_uniqueness_reward/group_std_mean": 0.09759739488363266, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007651345897465944, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007651345897465944, "signal/format_reward/centered_abs_mean": 0.03964843787252903, "signal/format_reward/group_std_mean": 0.07346592992544174, "signal/format_reward/group_zero_std_frac": 0.7027777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.019824218936264515, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.019824218936264515, "signal/frontier_aurc_reward/centered_abs_mean": 0.002220911718904972, "signal/frontier_aurc_reward/group_std_mean": 0.003224900644272566, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9754316821927206e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9754316821927206e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18549104630947114, "signal/frontier_coverage_1/group_std_mean": 0.25374809205532073, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_10/centered_abs_mean": 0.18549104630947114, "signal/frontier_coverage_10/group_std_mean": 0.25374809205532073, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_15/centered_abs_mean": 0.18549104630947114, "signal/frontier_coverage_15/group_std_mean": 0.25374809205532073, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_20/centered_abs_mean": 0.18549104630947114, "signal/frontier_coverage_20/group_std_mean": 0.25374809205532073, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_25/centered_abs_mean": 0.17355382144451142, "signal/frontier_coverage_25/group_std_mean": 0.2386282503604889, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003106613457202911, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003106613457202911, "signal/frontier_coverage_5/centered_abs_mean": 0.18549104630947114, "signal/frontier_coverage_5/group_std_mean": 0.25374809205532073, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003320289496332407, "signal/frontier_ece_reward/centered_abs_mean": 0.02598983086645603, "signal/frontier_ece_reward/group_std_mean": 0.03347852304577827, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003248728858307004, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003248728858307004, "step": 60 }, { "calibration/aurc": 0.24556579726074385, "calibration/batch_distribution_entropy": 0.9026302785834652, "calibration/buffer_distribution_entropy": 0.9318042377016408, "calibration/confidence_entropy": 0.4486623650823532, "calibration/coverage@0%": 0.00793576986065844, "calibration/coverage@1%": 0.00793576986065844, "calibration/coverage@10%": 0.1194504595567045, "calibration/coverage@15%": 0.3187219480359977, "calibration/coverage@20%": 0.5244310933832791, "calibration/coverage@25%": 0.6673086716262446, "calibration/coverage@30%": 0.736127819466827, "calibration/coverage@5%": 0.018434457524700434, "calibration/ece": 0.1280835390664104, "calibration/mean_confidence": 0.6550812971775258, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021006944444444443, "completions/max_length": 3758.4, "completions/max_terminated_length": 3758.4, "completions/mean_length": 1050.6894287109376, "completions/mean_terminated_length": 1073.2746337890626, "completions/min_length": 0.0, "completions/min_terminated_length": 247.6, "epoch": 0.1559980500243747, "grad_norm": 0.0004071203584317118, "learning_rate": 4.307228915662651e-06, "loss": -0.0193, "num_tokens": 183305079.0, "reward": 1.0295760035514832, "reward_std": 0.1585527241230011, "rewards/accuracy_reward": 0.6458333253860473, "rewards/brier_reward": 0.7787026405334473, "rewards/confidence_uniqueness_reward": 0.922456705570221, "rewards/format_reward": 0.9786458373069763, "rewards/frontier_aurc_reward": -0.0021291735116392373, "rewards/frontier_coverage_1": 0.022982970625162125, "rewards/frontier_coverage_10": 0.022982970625162125, "rewards/frontier_coverage_15": 0.022982970625162125, "rewards/frontier_coverage_20": 0.022982970625162125, "rewards/frontier_coverage_25": 0.022807615250349043, "rewards/frontier_coverage_5": 0.022982970625162125, "rewards/frontier_ece_reward": 0.018114662915468215, "signal/accuracy_reward/centered_abs_mean": 0.1768988698720932, "signal/accuracy_reward/group_std_mean": 0.2400292694568634, "signal/accuracy_reward/group_zero_std_frac": 0.30277777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0884494349360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0884494349360466, "signal/advantage_abs_mean": 0.1123097226023674, "signal/advantage_pre_scale_abs_mean": 0.1123097226023674, "signal/advantage_pre_scale_std": 0.18798567056655885, "signal/advantage_std": 0.18798567056655885, "signal/brier_reward/centered_abs_mean": 0.1735439658164978, "signal/brier_reward/group_std_mean": 0.22210538983345032, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021692995727062226, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021692995727062226, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.058646786212921145, "signal/confidence_uniqueness_reward/group_std_mean": 0.09276971966028214, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007330848276615143, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007330848276615143, "signal/format_reward/centered_abs_mean": 0.03575303815305233, "signal/format_reward/group_std_mean": 0.06657437458634377, "signal/format_reward/group_zero_std_frac": 0.7277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017876519076526164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017876519076526164, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018694063648581505, "signal/frontier_aurc_reward/group_std_mean": 0.0027225222904235123, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.346237317600753e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.346237317600753e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16750572323799134, "signal/frontier_coverage_1/group_std_mean": 0.232903328537941, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_10/centered_abs_mean": 0.16750572323799134, "signal/frontier_coverage_10/group_std_mean": 0.232903328537941, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_15/centered_abs_mean": 0.16750572323799134, "signal/frontier_coverage_15/group_std_mean": 0.232903328537941, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_20/centered_abs_mean": 0.16750572323799134, "signal/frontier_coverage_20/group_std_mean": 0.232903328537941, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_25/centered_abs_mean": 0.1528725266456604, "signal/frontier_coverage_25/group_std_mean": 0.21361254751682282, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00273641818203032, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00273641818203032, "signal/frontier_coverage_5/centered_abs_mean": 0.16750572323799134, "signal/frontier_coverage_5/group_std_mean": 0.232903328537941, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002998352330178022, "signal/frontier_ece_reward/centered_abs_mean": 0.02373338267207146, "signal/frontier_ece_reward/group_std_mean": 0.03065846674144268, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0029666728340089323, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0029666728340089323, "step": 65 }, { "calibration/aurc": 0.25933503364998944, "calibration/batch_distribution_entropy": 0.8800218583456665, "calibration/buffer_distribution_entropy": 0.9317119694851437, "calibration/confidence_entropy": 0.4205811264235626, "calibration/coverage@0%": 0.003237462060991473, "calibration/coverage@1%": 0.003237462060991473, "calibration/coverage@10%": 0.14655297008238183, "calibration/coverage@15%": 0.216372308137014, "calibration/coverage@20%": 0.38418761415631975, "calibration/coverage@25%": 0.5244719724579049, "calibration/coverage@30%": 0.6908412398030606, "calibration/coverage@5%": 0.037462060991472756, "calibration/ece": 0.1322729398015512, "calibration/mean_confidence": 0.6758229255336713, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02743055555555556, "completions/max_length": 3929.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 1083.6014892578125, "completions/mean_terminated_length": 1114.269970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.16799790002624967, "grad_norm": 0.0003614244342315942, "learning_rate": 4.156626506024097e-06, "loss": -0.0247, "num_tokens": 198866312.0, "reward": 1.0165080308914185, "reward_std": 0.16381416022777556, "rewards/accuracy_reward": 0.6317708253860473, "rewards/brier_reward": 0.7659277558326721, "rewards/confidence_uniqueness_reward": 0.9152140974998474, "rewards/format_reward": 0.971875011920929, "rewards/frontier_aurc_reward": -0.0022951006889343263, "rewards/frontier_coverage_1": 0.023939225263893603, "rewards/frontier_coverage_10": 0.023939225263893603, "rewards/frontier_coverage_15": 0.023939225263893603, "rewards/frontier_coverage_20": 0.023939225263893603, "rewards/frontier_coverage_25": 0.023304045526310803, "rewards/frontier_coverage_5": 0.023939225263893603, "rewards/frontier_ece_reward": 0.016189970448613165, "signal/accuracy_reward/centered_abs_mean": 0.17754991054534913, "signal/accuracy_reward/group_std_mean": 0.23475689589977264, "signal/accuracy_reward/group_zero_std_frac": 0.3277777820825577, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08877495527267457, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08877495527267457, "signal/advantage_abs_mean": 0.11769283264875412, "signal/advantage_pre_scale_abs_mean": 0.11769283264875412, "signal/advantage_pre_scale_std": 0.1989002138376236, "signal/advantage_std": 0.1989002138376236, "signal/brier_reward/centered_abs_mean": 0.17325561940670015, "signal/brier_reward/group_std_mean": 0.22210197150707245, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021656952425837518, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021656952425837518, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06669195368885994, "signal/confidence_uniqueness_reward/group_std_mean": 0.1016717791557312, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008336494211107492, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008336494211107492, "signal/format_reward/centered_abs_mean": 0.04429253414273262, "signal/format_reward/group_std_mean": 0.074928018450737, "signal/format_reward/group_zero_std_frac": 0.7222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02214626707136631, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02214626707136631, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019255728693678975, "signal/frontier_aurc_reward/group_std_mean": 0.0028066968079656363, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4467753357603216e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4467753357603216e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15990498065948486, "signal/frontier_coverage_1/group_std_mean": 0.22118420898914337, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_10/centered_abs_mean": 0.15990498065948486, "signal/frontier_coverage_10/group_std_mean": 0.22118420898914337, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_15/centered_abs_mean": 0.15990498065948486, "signal/frontier_coverage_15/group_std_mean": 0.22118420898914337, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_20/centered_abs_mean": 0.15990498065948486, "signal/frontier_coverage_20/group_std_mean": 0.22118420898914337, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_25/centered_abs_mean": 0.13494354784488677, "signal/frontier_coverage_25/group_std_mean": 0.18864382803440094, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002415489498525858, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002415489498525858, "signal/frontier_coverage_5/centered_abs_mean": 0.15990498065948486, "signal/frontier_coverage_5/group_std_mean": 0.22118420898914337, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028622991405427454, "signal/frontier_ece_reward/centered_abs_mean": 0.022081541270017623, "signal/frontier_ece_reward/group_std_mean": 0.02841006629168987, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002760192658752203, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002760192658752203, "step": 70 }, { "calibration/aurc": 0.21584225769988402, "calibration/batch_distribution_entropy": 0.8232698717769189, "calibration/buffer_distribution_entropy": 0.9289513937019261, "calibration/confidence_entropy": 0.40924202055118303, "calibration/coverage@0%": 0.02377966824023079, "calibration/coverage@1%": 0.02377966824023079, "calibration/coverage@10%": 0.27830202596380804, "calibration/coverage@15%": 0.3686221479150275, "calibration/coverage@20%": 0.4240375579159018, "calibration/coverage@25%": 0.5429435483870968, "calibration/coverage@30%": 0.7251450198075834, "calibration/coverage@5%": 0.146294065259201, "calibration/ece": 0.133336138135841, "calibration/mean_confidence": 0.7307139257891493, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01875, "completions/max_length": 3962.8, "completions/max_terminated_length": 3962.8, "completions/mean_length": 1109.963720703125, "completions/mean_terminated_length": 1131.3071044921876, "completions/min_length": 0.0, "completions/min_terminated_length": 300.6, "epoch": 0.17999775002812465, "grad_norm": 0.000340988248353824, "learning_rate": 4.006024096385543e-06, "loss": -0.0156, "num_tokens": 214717990.0, "reward": 1.048438024520874, "reward_std": 0.1558634340763092, "rewards/accuracy_reward": 0.6855034828186035, "rewards/brier_reward": 0.784266984462738, "rewards/confidence_uniqueness_reward": 0.9156635403633118, "rewards/format_reward": 0.9809895753860474, "rewards/frontier_aurc_reward": -0.002111028810031712, "rewards/frontier_coverage_1": 0.005601268447935581, "rewards/frontier_coverage_10": 0.005601268447935581, "rewards/frontier_coverage_15": 0.005601268447935581, "rewards/frontier_coverage_20": 0.005601268447935581, "rewards/frontier_coverage_25": 0.010380196291953326, "rewards/frontier_coverage_5": 0.005601268447935581, "rewards/frontier_ece_reward": 0.01640697121620178, "signal/accuracy_reward/centered_abs_mean": 0.18347981870174407, "signal/accuracy_reward/group_std_mean": 0.2395369827747345, "signal/accuracy_reward/group_zero_std_frac": 0.3277777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09173990935087203, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09173990935087203, "signal/advantage_abs_mean": 0.11403761059045792, "signal/advantage_pre_scale_abs_mean": 0.11403761059045792, "signal/advantage_pre_scale_std": 0.19213829636573793, "signal/advantage_std": 0.19213829636573793, "signal/brier_reward/centered_abs_mean": 0.1615518569946289, "signal/brier_reward/group_std_mean": 0.2098041832447052, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020193982124328613, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020193982124328613, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06113546639680863, "signal/confidence_uniqueness_reward/group_std_mean": 0.09073543101549149, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007641933299601078, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007641933299601078, "signal/format_reward/centered_abs_mean": 0.03050672747194767, "signal/format_reward/group_std_mean": 0.054883723706007005, "signal/format_reward/group_zero_std_frac": 0.7805555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015253363735973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015253363735973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018784363754093647, "signal/frontier_aurc_reward/group_std_mean": 0.002798874117434025, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.362400966580026e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.362400966580026e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13647643923759462, "signal/frontier_coverage_1/group_std_mean": 0.19730258584022523, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_10/centered_abs_mean": 0.13647643923759462, "signal/frontier_coverage_10/group_std_mean": 0.19730258584022523, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_15/centered_abs_mean": 0.13647643923759462, "signal/frontier_coverage_15/group_std_mean": 0.19730258584022523, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_20/centered_abs_mean": 0.13647643923759462, "signal/frontier_coverage_20/group_std_mean": 0.19730258584022523, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_25/centered_abs_mean": 0.10959683507680892, "signal/frontier_coverage_25/group_std_mean": 0.16029545962810515, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019617833429947497, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019617833429947497, "signal/frontier_coverage_5/centered_abs_mean": 0.13647643923759462, "signal/frontier_coverage_5/group_std_mean": 0.19730258584022523, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002442928357049823, "signal/frontier_ece_reward/centered_abs_mean": 0.019810602813959122, "signal/frontier_ece_reward/group_std_mean": 0.025616522505879404, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0024763253517448903, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0024763253517448903, "step": 75 }, { "calibration/aurc": 0.21687736600561439, "calibration/batch_distribution_entropy": 0.7663935195687704, "calibration/buffer_distribution_entropy": 0.9254838064808076, "calibration/confidence_entropy": 0.4093730514531302, "calibration/coverage@0%": 0.014151451943476879, "calibration/coverage@1%": 0.014151451943476879, "calibration/coverage@10%": 0.019910614247141797, "calibration/coverage@15%": 0.35223903051059835, "calibration/coverage@20%": 0.547384604330937, "calibration/coverage@25%": 0.7177102314561801, "calibration/coverage@30%": 0.8540723393182409, "calibration/coverage@5%": 0.019910614247141797, "calibration/ece": 0.154557627381037, "calibration/mean_confidence": 0.7600029543594301, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018663194444444465, "completions/max_length": 3866.6, "completions/max_terminated_length": 3866.6, "completions/mean_length": 1140.516748046875, "completions/mean_terminated_length": 1162.3487548828125, "completions/min_length": 0.0, "completions/min_terminated_length": 255.4, "epoch": 0.19199760002999963, "grad_norm": 0.0004383666382636875, "learning_rate": 3.855421686746989e-06, "loss": -0.0172, "num_tokens": 230910023.0, "reward": 1.0291898369789123, "reward_std": 0.15721507370471954, "rewards/accuracy_reward": 0.6474826335906982, "rewards/brier_reward": 0.7720065116882324, "rewards/confidence_uniqueness_reward": 0.9144485116004943, "rewards/format_reward": 0.9813368082046509, "rewards/frontier_aurc_reward": -0.0023890127893537285, "rewards/frontier_coverage_1": 0.021373348124325276, "rewards/frontier_coverage_10": 0.021373348124325276, "rewards/frontier_coverage_15": 0.021373348124325276, "rewards/frontier_coverage_20": 0.021373348124325276, "rewards/frontier_coverage_25": 0.02539810836315155, "rewards/frontier_coverage_5": 0.021373348124325276, "rewards/frontier_ece_reward": 0.013187539111822844, "signal/accuracy_reward/centered_abs_mean": 0.18199326992034912, "signal/accuracy_reward/group_std_mean": 0.24213020503520966, "signal/accuracy_reward/group_zero_std_frac": 0.30833333134651186, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09099663496017456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09099663496017456, "signal/advantage_abs_mean": 0.11401565670967102, "signal/advantage_pre_scale_abs_mean": 0.11401565670967102, "signal/advantage_pre_scale_std": 0.18852558135986328, "signal/advantage_std": 0.18852558135986328, "signal/brier_reward/centered_abs_mean": 0.16122573614120483, "signal/brier_reward/group_std_mean": 0.20942769348621368, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020153217017650604, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020153217017650604, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.061580770462751386, "signal/confidence_uniqueness_reward/group_std_mean": 0.09112796634435653, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007697596307843923, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007697596307843923, "signal/format_reward/centered_abs_mean": 0.02798936665058136, "signal/format_reward/group_std_mean": 0.05254996344447136, "signal/format_reward/group_zero_std_frac": 0.7805555820465088, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01399468332529068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01399468332529068, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019946877844631674, "signal/frontier_aurc_reward/group_std_mean": 0.0029552684631198646, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5704911351786e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5704911351786e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12816164940595626, "signal/frontier_coverage_1/group_std_mean": 0.18385762274265288, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_10/centered_abs_mean": 0.12816164940595626, "signal/frontier_coverage_10/group_std_mean": 0.18385762274265288, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_15/centered_abs_mean": 0.12816164940595626, "signal/frontier_coverage_15/group_std_mean": 0.18385762274265288, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_20/centered_abs_mean": 0.12816164940595626, "signal/frontier_coverage_20/group_std_mean": 0.18385762274265288, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_25/centered_abs_mean": 0.09580764919519424, "signal/frontier_coverage_25/group_std_mean": 0.13949446082115174, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001714956876821816, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001714956876821816, "signal/frontier_coverage_5/centered_abs_mean": 0.12816164940595626, "signal/frontier_coverage_5/group_std_mean": 0.18385762274265288, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002294093510136008, "signal/frontier_ece_reward/centered_abs_mean": 0.018544533848762514, "signal/frontier_ece_reward/group_std_mean": 0.02403351552784443, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002318066731095314, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002318066731095314, "step": 80 }, { "calibration/aurc": 0.23449193306557897, "calibration/batch_distribution_entropy": 0.8591898174593183, "calibration/buffer_distribution_entropy": 0.9250169156228909, "calibration/confidence_entropy": 0.43118254354529395, "calibration/coverage@0%": 0.013292608746136697, "calibration/coverage@1%": 0.013292608746136697, "calibration/coverage@10%": 0.0825480865481328, "calibration/coverage@15%": 0.18059930829873508, "calibration/coverage@20%": 0.2875147713491862, "calibration/coverage@25%": 0.6253215245244499, "calibration/coverage@30%": 0.8177006976439463, "calibration/coverage@5%": 0.02068047154297047, "calibration/ece": 0.12127799731225304, "calibration/mean_confidence": 0.6915215553744556, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01597222222222223, "completions/max_length": 3916.4, "completions/max_terminated_length": 3916.4, "completions/mean_length": 1137.9025146484375, "completions/mean_terminated_length": 1156.4914306640626, "completions/min_length": 0.0, "completions/min_terminated_length": 301.6, "epoch": 0.2039974500318746, "grad_norm": 0.00035721127642318606, "learning_rate": 3.7048192771084342e-06, "loss": -0.0138, "num_tokens": 247105860.0, "reward": 1.0467980623245239, "reward_std": 0.15480698943138121, "rewards/accuracy_reward": 0.675000011920929, "rewards/brier_reward": 0.7885265827178956, "rewards/confidence_uniqueness_reward": 0.9201976656913757, "rewards/format_reward": 0.9839409708976745, "rewards/frontier_aurc_reward": -0.0019012054428458214, "rewards/frontier_coverage_1": 0.018737619929015636, "rewards/frontier_coverage_10": 0.018737619929015636, "rewards/frontier_coverage_15": 0.018737619929015636, "rewards/frontier_coverage_20": 0.018737619929015636, "rewards/frontier_coverage_25": 0.020995143987238406, "rewards/frontier_coverage_5": 0.018737619929015636, "rewards/frontier_ece_reward": 0.013746128231287003, "signal/accuracy_reward/centered_abs_mean": 0.18850911557674407, "signal/accuracy_reward/group_std_mean": 0.243252757191658, "signal/accuracy_reward/group_zero_std_frac": 0.325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09425455778837204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09425455778837204, "signal/advantage_abs_mean": 0.11363825798034669, "signal/advantage_pre_scale_abs_mean": 0.11363825798034669, "signal/advantage_pre_scale_std": 0.18625059127807617, "signal/advantage_std": 0.18625059127807617, "signal/brier_reward/centered_abs_mean": 0.17142007052898406, "signal/brier_reward/group_std_mean": 0.21921891272068023, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021427508816123007, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021427508816123007, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05906034857034683, "signal/confidence_uniqueness_reward/group_std_mean": 0.08878287822008132, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007382543571293354, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007382543571293354, "signal/format_reward/centered_abs_mean": 0.02738172747194767, "signal/format_reward/group_std_mean": 0.052156589925289154, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013690863735973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013690863735973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018494134768843652, "signal/frontier_aurc_reward/group_std_mean": 0.0028435390442609785, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.310450192657299e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.310450192657299e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15455899834632875, "signal/frontier_coverage_1/group_std_mean": 0.21880318522453307, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_10/centered_abs_mean": 0.15455899834632875, "signal/frontier_coverage_10/group_std_mean": 0.21880318522453307, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_15/centered_abs_mean": 0.15455899834632875, "signal/frontier_coverage_15/group_std_mean": 0.21880318522453307, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_20/centered_abs_mean": 0.15455899834632875, "signal/frontier_coverage_20/group_std_mean": 0.21880318522453307, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_25/centered_abs_mean": 0.10806576907634735, "signal/frontier_coverage_25/group_std_mean": 0.1546470195055008, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019343771506100892, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019343771506100892, "signal/frontier_coverage_5/centered_abs_mean": 0.15455899834632875, "signal/frontier_coverage_5/group_std_mean": 0.21880318522453307, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027666058391332625, "signal/frontier_ece_reward/centered_abs_mean": 0.01972369700670242, "signal/frontier_ece_reward/group_std_mean": 0.0256511677056551, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0024654621258378027, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0024654621258378027, "step": 85 }, { "calibration/aurc": 0.17547247118711584, "calibration/batch_distribution_entropy": 0.8324117665264612, "calibration/buffer_distribution_entropy": 0.9242962693143969, "calibration/confidence_entropy": 0.41812509388979313, "calibration/coverage@0%": 0.006274520525474006, "calibration/coverage@1%": 0.006274520525474006, "calibration/coverage@10%": 0.18623046036962448, "calibration/coverage@15%": 0.5280255901645126, "calibration/coverage@20%": 0.7003362423016254, "calibration/coverage@25%": 0.8467375352104126, "calibration/coverage@30%": 0.9238959714431413, "calibration/coverage@5%": 0.006274520525474006, "calibration/ece": 0.10362900900626756, "calibration/mean_confidence": 0.6853092687555817, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017708333333333305, "completions/max_length": 3909.4, "completions/max_terminated_length": 3909.4, "completions/mean_length": 1028.1794311523438, "completions/mean_terminated_length": 1046.72275390625, "completions/min_length": 0.0, "completions/min_terminated_length": 257.8, "epoch": 0.2159973000337496, "grad_norm": 0.00035051541635766625, "learning_rate": 3.5542168674698798e-06, "loss": -0.0144, "num_tokens": 262019159.0, "reward": 1.044158434867859, "reward_std": 0.15149846374988557, "rewards/accuracy_reward": 0.6759548544883728, "rewards/brier_reward": 0.7803074479103088, "rewards/confidence_uniqueness_reward": 0.9109542846679688, "rewards/format_reward": 0.9822916626930237, "rewards/frontier_aurc_reward": -0.002121423464268446, "rewards/frontier_coverage_1": 0.01945815598592162, "rewards/frontier_coverage_10": 0.01945815598592162, "rewards/frontier_coverage_15": 0.01945815598592162, "rewards/frontier_coverage_20": 0.01945815598592162, "rewards/frontier_coverage_25": 0.02471369504928589, "rewards/frontier_coverage_5": 0.01945815598592162, "rewards/frontier_ece_reward": 0.011852136347442865, "signal/accuracy_reward/centered_abs_mean": 0.1751681834459305, "signal/accuracy_reward/group_std_mean": 0.229813551902771, "signal/accuracy_reward/group_zero_std_frac": 0.3500000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08758409172296525, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08758409172296525, "signal/advantage_abs_mean": 0.11029749214649201, "signal/advantage_pre_scale_abs_mean": 0.11029749214649201, "signal/advantage_pre_scale_std": 0.1847362846136093, "signal/advantage_std": 0.1847362846136093, "signal/brier_reward/centered_abs_mean": 0.17710019648075104, "signal/brier_reward/group_std_mean": 0.225379142165184, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02213752456009388, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02213752456009388, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06355848461389542, "signal/confidence_uniqueness_reward/group_std_mean": 0.09241807758808136, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007944810576736927, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007944810576736927, "signal/format_reward/centered_abs_mean": 0.02744140662252903, "signal/format_reward/group_std_mean": 0.05012650415301323, "signal/format_reward/group_zero_std_frac": 0.800000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013720703311264515, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013720703311264515, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019779345020651817, "signal/frontier_aurc_reward/group_std_mean": 0.0030898852739483116, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5405028756940735e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5405028756940735e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15366960763931276, "signal/frontier_coverage_1/group_std_mean": 0.2199167400598526, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_10/centered_abs_mean": 0.15366960763931276, "signal/frontier_coverage_10/group_std_mean": 0.2199167400598526, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_15/centered_abs_mean": 0.15366960763931276, "signal/frontier_coverage_15/group_std_mean": 0.2199167400598526, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_20/centered_abs_mean": 0.15366960763931276, "signal/frontier_coverage_20/group_std_mean": 0.2199167400598526, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_25/centered_abs_mean": 0.10579841881990433, "signal/frontier_coverage_25/group_std_mean": 0.1524827867746353, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018937916029244661, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018937916029244661, "signal/frontier_coverage_5/centered_abs_mean": 0.15366960763931276, "signal/frontier_coverage_5/group_std_mean": 0.2199167400598526, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027506859973073007, "signal/frontier_ece_reward/centered_abs_mean": 0.019161980226635934, "signal/frontier_ece_reward/group_std_mean": 0.025356636941432954, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002395247528329492, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002395247528329492, "step": 90 }, { "calibration/aurc": 0.21844268779520587, "calibration/batch_distribution_entropy": 0.8795974563426677, "calibration/buffer_distribution_entropy": 0.9255697608305195, "calibration/confidence_entropy": 0.43751562875025146, "calibration/coverage@0%": 0.012130314557219657, "calibration/coverage@1%": 0.012130314557219657, "calibration/coverage@10%": 0.14825596900748145, "calibration/coverage@15%": 0.3928766365487585, "calibration/coverage@20%": 0.602737623456852, "calibration/coverage@25%": 0.6771237675480899, "calibration/coverage@30%": 0.7594675782497744, "calibration/coverage@5%": 0.03516696377187935, "calibration/ece": 0.13509608069340767, "calibration/mean_confidence": 0.6490796523758365, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3796.2, "completions/max_terminated_length": 3796.2, "completions/mean_length": 1077.1295166015625, "completions/mean_terminated_length": 1090.8811279296874, "completions/min_length": 0.0, "completions/min_terminated_length": 270.6, "epoch": 0.22799715003562457, "grad_norm": 0.0004233669606037438, "learning_rate": 3.4036144578313257e-06, "loss": -0.012, "num_tokens": 277519371.0, "reward": 1.0404303312301635, "reward_std": 0.14202898740768433, "rewards/accuracy_reward": 0.6548611044883728, "rewards/brier_reward": 0.7843343019485474, "rewards/confidence_uniqueness_reward": 0.9325709342956543, "rewards/format_reward": 0.9874131917953491, "rewards/frontier_aurc_reward": -0.001874490245245397, "rewards/frontier_coverage_1": 0.030402445048093796, "rewards/frontier_coverage_10": 0.030402445048093796, "rewards/frontier_coverage_15": 0.030402445048093796, "rewards/frontier_coverage_20": 0.030402445048093796, "rewards/frontier_coverage_25": 0.03526088930666447, "rewards/frontier_coverage_5": 0.030402445048093796, "rewards/frontier_ece_reward": 0.01089109033346176, "signal/accuracy_reward/centered_abs_mean": 0.16573350727558137, "signal/accuracy_reward/group_std_mean": 0.2209865093231201, "signal/accuracy_reward/group_zero_std_frac": 0.37222222685813905, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08286675363779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08286675363779068, "signal/advantage_abs_mean": 0.10206114500761032, "signal/advantage_pre_scale_abs_mean": 0.10206114500761032, "signal/advantage_pre_scale_std": 0.17137506306171418, "signal/advantage_std": 0.17137506306171418, "signal/brier_reward/centered_abs_mean": 0.17366032898426056, "signal/brier_reward/group_std_mean": 0.2203914701938629, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02170754112303257, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02170754112303257, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04881677031517029, "signal/confidence_uniqueness_reward/group_std_mean": 0.07638464868068695, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006102096289396286, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006102096289396286, "signal/format_reward/centered_abs_mean": 0.022520615719258785, "signal/format_reward/group_std_mean": 0.04575785622000694, "signal/format_reward/group_zero_std_frac": 0.8027777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011260307859629393, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011260307859629393, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016592080472037196, "signal/frontier_aurc_reward/group_std_mean": 0.002632213244214654, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9699822334805503e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9699822334805503e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1714262396097183, "signal/frontier_coverage_1/group_std_mean": 0.23666555285453797, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_10/centered_abs_mean": 0.1714262396097183, "signal/frontier_coverage_10/group_std_mean": 0.23666555285453797, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_15/centered_abs_mean": 0.1714262396097183, "signal/frontier_coverage_15/group_std_mean": 0.23666555285453797, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_20/centered_abs_mean": 0.1714262396097183, "signal/frontier_coverage_20/group_std_mean": 0.23666555285453797, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_25/centered_abs_mean": 0.11292467564344406, "signal/frontier_coverage_25/group_std_mean": 0.15729531347751619, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020213516661897303, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020213516661897303, "signal/frontier_coverage_5/centered_abs_mean": 0.1714262396097183, "signal/frontier_coverage_5/group_std_mean": 0.23666555285453797, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030685296282172204, "signal/frontier_ece_reward/centered_abs_mean": 0.018675522133708, "signal/frontier_ece_reward/group_std_mean": 0.024996720254421234, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023344402667135, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023344402667135, "step": 95 }, { "calibration/aurc": 0.19269483529461973, "calibration/batch_distribution_entropy": 0.9275256037664482, "calibration/buffer_distribution_entropy": 0.926590680796312, "calibration/confidence_entropy": 0.447337380697335, "calibration/coverage@0%": 0.01488082526195049, "calibration/coverage@1%": 0.01488082526195049, "calibration/coverage@10%": 0.16575615682693723, "calibration/coverage@15%": 0.3623856419311955, "calibration/coverage@20%": 0.5321022617479968, "calibration/coverage@25%": 0.8242483211397911, "calibration/coverage@30%": 0.9162873399715504, "calibration/coverage@5%": 0.062095679373356326, "calibration/ece": 0.14211453397091672, "calibration/mean_confidence": 0.6130730250809763, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01675347222222221, "completions/max_length": 3890.2, "completions/max_terminated_length": 3890.2, "completions/mean_length": 1112.4980224609376, "completions/mean_terminated_length": 1131.3287841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 300.8, "epoch": 0.23999700003749952, "grad_norm": 0.0003026402264367789, "learning_rate": 3.2530120481927713e-06, "loss": -0.0142, "num_tokens": 293434420.0, "reward": 1.0494256734848022, "reward_std": 0.14548470377922057, "rewards/accuracy_reward": 0.6754340291023254, "rewards/brier_reward": 0.7901706337928772, "rewards/confidence_uniqueness_reward": 0.9355661392211914, "rewards/format_reward": 0.9832465171813964, "rewards/frontier_aurc_reward": -0.0016389565775170923, "rewards/frontier_coverage_1": 0.026069404324516654, "rewards/frontier_coverage_10": 0.026069404324516654, "rewards/frontier_coverage_15": 0.026069404324516654, "rewards/frontier_coverage_20": 0.026069404324516654, "rewards/frontier_coverage_25": 0.03032403439283371, "rewards/frontier_coverage_5": 0.026069404324516654, "rewards/frontier_ece_reward": 0.012173208221793175, "signal/accuracy_reward/centered_abs_mean": 0.17470160722732545, "signal/accuracy_reward/group_std_mean": 0.229377481341362, "signal/accuracy_reward/group_zero_std_frac": 0.3500000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08735080361366272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08735080361366272, "signal/advantage_abs_mean": 0.10435573160648345, "signal/advantage_pre_scale_abs_mean": 0.10435573160648345, "signal/advantage_pre_scale_std": 0.17683197557926178, "signal/advantage_std": 0.17683197557926178, "signal/brier_reward/centered_abs_mean": 0.17027658522129058, "signal/brier_reward/group_std_mean": 0.21771090626716613, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021284573152661322, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021284573152661322, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04709148705005646, "signal/confidence_uniqueness_reward/group_std_mean": 0.07536679804325104, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0058864358812570575, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0058864358812570575, "signal/format_reward/centered_abs_mean": 0.02706705704331398, "signal/format_reward/group_std_mean": 0.05143220648169518, "signal/format_reward/group_zero_std_frac": 0.7888889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01353352852165699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01353352852165699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016033690189942718, "signal/frontier_aurc_reward/group_std_mean": 0.002564445650205016, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8700304392259567e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8700304392259567e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18503127694129945, "signal/frontier_coverage_1/group_std_mean": 0.2500757068395615, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_10/centered_abs_mean": 0.18503127694129945, "signal/frontier_coverage_10/group_std_mean": 0.2500757068395615, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_15/centered_abs_mean": 0.18503127694129945, "signal/frontier_coverage_15/group_std_mean": 0.2500757068395615, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_20/centered_abs_mean": 0.18503127694129945, "signal/frontier_coverage_20/group_std_mean": 0.2500757068395615, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_25/centered_abs_mean": 0.11901406049728394, "signal/frontier_coverage_25/group_std_mean": 0.16131974160671234, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002130351681262255, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002130351681262255, "signal/frontier_coverage_5/centered_abs_mean": 0.18503127694129945, "signal/frontier_coverage_5/group_std_mean": 0.2500757068395615, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003312059724703431, "signal/frontier_ece_reward/centered_abs_mean": 0.0202214565128088, "signal/frontier_ece_reward/group_std_mean": 0.026237889006733895, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025276820641011, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025276820641011, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.14377964715819236, "eval_calibration/batch_distribution_entropy": 0.8031513733746348, "eval_calibration/buffer_distribution_entropy": 0.928078833996445, "eval_calibration/confidence_entropy": 0.39359989336584955, "eval_calibration/coverage@0%": 0.23172043010752688, "eval_calibration/coverage@1%": 0.23172043010752688, "eval_calibration/coverage@10%": 0.44854390681003586, "eval_calibration/coverage@15%": 0.6677643369175628, "eval_calibration/coverage@20%": 0.7693212365591399, "eval_calibration/coverage@25%": 0.8956989247311827, "eval_calibration/coverage@30%": 0.9555555555555556, "eval_calibration/coverage@5%": 0.23172043010752688, "eval_calibration/ece": 0.215171482975084, "eval_calibration/mean_confidence": 0.6850989023299228, "eval_completions/clipped_ratio": 0.021527777777777795, "eval_completions/max_length": 3111.1666666666665, "eval_completions/max_terminated_length": 3111.1666666666665, "eval_completions/mean_length": 1071.9295450846355, "eval_completions/mean_terminated_length": 1095.677001953125, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 372.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 293434420.0, "eval_reward": 1.0389058788617451, "eval_reward_std": 0.2671206171313922, "eval_rewards/accuracy_reward": 0.671006957689921, "eval_rewards/brier_reward": 0.7895856300989786, "eval_rewards/confidence_uniqueness_reward": 0.8775778909524282, "eval_rewards/format_reward": 0.9800347288449606, "eval_rewards/frontier_aurc_reward": -0.001799254697592308, "eval_rewards/frontier_coverage_1": 0.030476751853711903, "eval_rewards/frontier_coverage_10": 0.030476751853711903, "eval_rewards/frontier_coverage_15": 0.030476751853711903, "eval_rewards/frontier_coverage_20": 0.030476751853711903, "eval_rewards/frontier_coverage_25": 0.039223356172442436, "eval_rewards/frontier_coverage_5": 0.030476751853711903, "eval_rewards/frontier_ece_reward": 0.012736255613466104, "eval_runtime": 218.0582, "eval_samples_per_second": 4.586, "eval_signal/accuracy_reward/centered_abs_mean": 0.4236653645833333, "eval_signal/accuracy_reward/group_std_mean": 0.4662252912918727, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21183268229166666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21183268229166666, "eval_signal/advantage_abs_mean": 0.22447845339775085, "eval_signal/advantage_pre_scale_abs_mean": 0.22447845339775085, "eval_signal/advantage_pre_scale_std": 0.26788055896759033, "eval_signal/advantage_std": 0.26788055896759033, "eval_signal/brier_reward/centered_abs_mean": 0.24005225549141565, "eval_signal/brier_reward/group_std_mean": 0.2984655201435089, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030006531936426956, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.030006531936426956, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0669537124534448, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11659604435165723, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0083692140566806, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0083692140566806, "eval_signal/format_reward/centered_abs_mean": 0.037923177083333336, "eval_signal/format_reward/group_std_mean": 0.09447787639995416, "eval_signal/format_reward/group_zero_std_frac": 0.5277777860562006, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018961588541666668, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.018961588541666668, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0027151394557828703, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004679826592716078, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8600995190402806e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8600995190402806e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.21269922455151877, "eval_signal/frontier_coverage_1/group_std_mean": 0.33839886883894604, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.21269922455151877, "eval_signal/frontier_coverage_10/group_std_mean": 0.33839886883894604, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.21269922455151877, "eval_signal/frontier_coverage_15/group_std_mean": 0.33839886883894604, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.21269922455151877, "eval_signal/frontier_coverage_20/group_std_mean": 0.33839886883894604, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.1346950319906076, "eval_signal/frontier_coverage_25/group_std_mean": 0.2107120007276535, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024110410595312715, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024110410595312715, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.21269922455151877, "eval_signal/frontier_coverage_5/group_std_mean": 0.33839886883894604, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003807315952144563, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.022523170647521813, "eval_signal/frontier_ece_reward/group_std_mean": 0.030609419258932274, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028153963309402266, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028153963309402266, "eval_steps_per_second": 0.028, "step": 100 }, { "calibration/aurc": 0.3135478311696943, "calibration/batch_distribution_entropy": 0.8595469934829717, "calibration/buffer_distribution_entropy": 0.9278512946088252, "calibration/confidence_entropy": 0.3974191602109486, "calibration/coverage@0%": 0.013829787234042554, "calibration/coverage@1%": 0.013829787234042554, "calibration/coverage@10%": 0.14787234042553193, "calibration/coverage@15%": 0.17220483938256154, "calibration/coverage@20%": 0.20299209956745323, "calibration/coverage@25%": 0.32088064824285356, "calibration/coverage@30%": 0.48956564348977183, "calibration/coverage@5%": 0.12340425531914893, "calibration/ece": 0.1898765576679796, "calibration/mean_confidence": 0.6748698600578947, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020486111111111094, "completions/max_length": 3933.4, "completions/max_terminated_length": 3933.4, "completions/mean_length": 1077.4388061523437, "completions/mean_terminated_length": 1100.0875732421875, "completions/min_length": 0.0, "completions/min_terminated_length": 255.6, "epoch": 0.2519968500393745, "grad_norm": 0.00034545789822004735, "learning_rate": 3.1024096385542172e-06, "loss": -0.0174, "num_tokens": 308923379.0, "reward": 1.04494047164917, "reward_std": 0.1398667186498642, "rewards/accuracy_reward": 0.6754340171813965, "rewards/brier_reward": 0.782523512840271, "rewards/confidence_uniqueness_reward": 0.9232323408126831, "rewards/format_reward": 0.9793402671813964, "rewards/frontier_aurc_reward": -0.0020483172265812755, "rewards/frontier_coverage_1": 0.024506374448537826, "rewards/frontier_coverage_10": 0.024506374448537826, "rewards/frontier_coverage_15": 0.024506374448537826, "rewards/frontier_coverage_20": 0.024506374448537826, "rewards/frontier_coverage_25": 0.028536751121282577, "rewards/frontier_coverage_5": 0.024506374448537826, "rewards/frontier_ece_reward": 0.013331157714128494, "signal/accuracy_reward/centered_abs_mean": 0.15463867038488388, "signal/accuracy_reward/group_std_mean": 0.20896627902984619, "signal/accuracy_reward/group_zero_std_frac": 0.39722222089767456, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07731933519244194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07731933519244194, "signal/advantage_abs_mean": 0.0999849259853363, "signal/advantage_pre_scale_abs_mean": 0.0999849259853363, "signal/advantage_pre_scale_std": 0.17430230379104614, "signal/advantage_std": 0.17430230379104614, "signal/brier_reward/centered_abs_mean": 0.16766727864742278, "signal/brier_reward/group_std_mean": 0.21422589123249053, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020958409830927848, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020958409830927848, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05548132359981537, "signal/confidence_uniqueness_reward/group_std_mean": 0.0828926458954811, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006935165449976921, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006935165449976921, "signal/format_reward/centered_abs_mean": 0.03021918348968029, "signal/format_reward/group_std_mean": 0.05192293673753738, "signal/format_reward/group_zero_std_frac": 0.8, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015109591744840145, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015109591744840145, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020640650764107704, "signal/frontier_aurc_reward/group_std_mean": 0.0032852147705852985, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6946763793821445e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6946763793821445e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16248934268951415, "signal/frontier_coverage_1/group_std_mean": 0.22402643859386445, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_10/centered_abs_mean": 0.16248934268951415, "signal/frontier_coverage_10/group_std_mean": 0.22402643859386445, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_15/centered_abs_mean": 0.16248934268951415, "signal/frontier_coverage_15/group_std_mean": 0.22402643859386445, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_20/centered_abs_mean": 0.16248934268951415, "signal/frontier_coverage_20/group_std_mean": 0.22402643859386445, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_25/centered_abs_mean": 0.10017142742872238, "signal/frontier_coverage_25/group_std_mean": 0.13701665103435517, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017930685309693218, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017930685309693218, "signal/frontier_coverage_5/centered_abs_mean": 0.16248934268951415, "signal/frontier_coverage_5/group_std_mean": 0.22402643859386445, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002908559050410986, "signal/frontier_ece_reward/centered_abs_mean": 0.020253218337893487, "signal/frontier_ece_reward/group_std_mean": 0.025626911595463753, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002531652292236686, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002531652292236686, "step": 105 }, { "calibration/aurc": 0.18315490609084778, "calibration/batch_distribution_entropy": 0.7765032894890078, "calibration/buffer_distribution_entropy": 0.9242474732058756, "calibration/confidence_entropy": 0.3775560392498729, "calibration/coverage@0%": 0.06752858081031457, "calibration/coverage@1%": 0.0759496334418935, "calibration/coverage@10%": 0.28782259073587924, "calibration/coverage@15%": 0.36077427800708595, "calibration/coverage@20%": 0.5508072448753857, "calibration/coverage@25%": 0.6846715225109482, "calibration/coverage@30%": 0.7913718321314601, "calibration/coverage@5%": 0.23705855547960814, "calibration/ece": 0.14042555442163512, "calibration/mean_confidence": 0.7471171009304948, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014670138888888884, "completions/max_length": 3876.6, "completions/max_terminated_length": 3876.6, "completions/mean_length": 1131.8022705078124, "completions/mean_terminated_length": 1148.7941162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 295.6, "epoch": 0.2639967000412495, "grad_norm": 0.00035962072433903813, "learning_rate": 2.9518072289156627e-06, "loss": -0.0127, "num_tokens": 325070189.0, "reward": 1.0619823932647705, "reward_std": 0.14728475213050843, "rewards/accuracy_reward": 0.6998263955116272, "rewards/brier_reward": 0.8028295993804931, "rewards/confidence_uniqueness_reward": 0.9204535126686096, "rewards/format_reward": 0.9849826335906983, "rewards/frontier_aurc_reward": -0.002051501488313079, "rewards/frontier_coverage_1": 0.020819610450416803, "rewards/frontier_coverage_10": 0.020819610450416803, "rewards/frontier_coverage_15": 0.020819610450416803, "rewards/frontier_coverage_20": 0.020819610450416803, "rewards/frontier_coverage_25": 0.03544456548988819, "rewards/frontier_coverage_5": 0.020819610450416803, "rewards/frontier_ece_reward": 0.01365122813731432, "signal/accuracy_reward/centered_abs_mean": 0.16985676884651185, "signal/accuracy_reward/group_std_mean": 0.22959282100200654, "signal/accuracy_reward/group_zero_std_frac": 0.32777778506278993, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08492838442325593, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08492838442325593, "signal/advantage_abs_mean": 0.10342257767915726, "signal/advantage_pre_scale_abs_mean": 0.10342257767915726, "signal/advantage_pre_scale_std": 0.18322791755199433, "signal/advantage_std": 0.18322791755199433, "signal/brier_reward/centered_abs_mean": 0.1463008463382721, "signal/brier_reward/group_std_mean": 0.19255680441856385, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01828760579228401, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01828760579228401, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0545014426112175, "signal/confidence_uniqueness_reward/group_std_mean": 0.08425513207912445, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006812680326402187, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006812680326402187, "signal/format_reward/centered_abs_mean": 0.0259168840944767, "signal/format_reward/group_std_mean": 0.050569449365139005, "signal/format_reward/group_zero_std_frac": 0.7833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01295844204723835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01295844204723835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021926365327090023, "signal/frontier_aurc_reward/group_std_mean": 0.003540871059522033, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.924819320673123e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.924819320673123e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1195900097489357, "signal/frontier_coverage_1/group_std_mean": 0.17233213186264038, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_10/centered_abs_mean": 0.1195900097489357, "signal/frontier_coverage_10/group_std_mean": 0.17233213186264038, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_15/centered_abs_mean": 0.1195900097489357, "signal/frontier_coverage_15/group_std_mean": 0.17233213186264038, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_20/centered_abs_mean": 0.1195900097489357, "signal/frontier_coverage_20/group_std_mean": 0.17233213186264038, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_25/centered_abs_mean": 0.07210812345147133, "signal/frontier_coverage_25/group_std_mean": 0.10068325251340866, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012907354161143304, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012907354161143304, "signal/frontier_coverage_5/centered_abs_mean": 0.1195900097489357, "signal/frontier_coverage_5/group_std_mean": 0.17233213186264038, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021406610263511538, "signal/frontier_ece_reward/centered_abs_mean": 0.0171145960688591, "signal/frontier_ece_reward/group_std_mean": 0.021544494852423667, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021393245086073877, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021393245086073877, "step": 110 }, { "calibration/aurc": 0.29120583351889173, "calibration/batch_distribution_entropy": 0.7863150995666791, "calibration/buffer_distribution_entropy": 0.9207539489185862, "calibration/confidence_entropy": 0.4187937868082977, "calibration/coverage@0%": 0.0075278531686347825, "calibration/coverage@1%": 0.0075278531686347825, "calibration/coverage@10%": 0.06411135680169563, "calibration/coverage@15%": 0.1510769604728217, "calibration/coverage@20%": 0.3940149695408244, "calibration/coverage@25%": 0.6425000196232538, "calibration/coverage@30%": 0.6874048087618722, "calibration/coverage@5%": 0.0075278531686347825, "calibration/ece": 0.18694521114390877, "calibration/mean_confidence": 0.748259641465957, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01684027777777779, "completions/max_length": 3995.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 1128.9096435546876, "completions/mean_terminated_length": 1148.427880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 324.2, "epoch": 0.27599655004312446, "grad_norm": 0.00030474571394734085, "learning_rate": 2.8012048192771087e-06, "loss": -0.0141, "num_tokens": 341154428.0, "reward": 1.0459946870803833, "reward_std": 0.14044857025146484, "rewards/accuracy_reward": 0.6740451335906983, "rewards/brier_reward": 0.7880466699600219, "rewards/confidence_uniqueness_reward": 0.9210368752479553, "rewards/format_reward": 0.9829861044883728, "rewards/frontier_aurc_reward": -0.002286965842358768, "rewards/frontier_coverage_1": 0.022654338832944633, "rewards/frontier_coverage_10": 0.022654338832944633, "rewards/frontier_coverage_15": 0.022654338832944633, "rewards/frontier_coverage_20": 0.022654338832944633, "rewards/frontier_coverage_25": 0.030701416730880737, "rewards/frontier_coverage_5": 0.022654338832944633, "rewards/frontier_ece_reward": 0.010459364019334316, "signal/accuracy_reward/centered_abs_mean": 0.16135525107383727, "signal/accuracy_reward/group_std_mean": 0.21080092787742616, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08067762553691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08067762553691864, "signal/advantage_abs_mean": 0.10324146151542664, "signal/advantage_pre_scale_abs_mean": 0.10324146151542664, "signal/advantage_pre_scale_std": 0.17995203137397767, "signal/advantage_std": 0.17995203137397767, "signal/brier_reward/centered_abs_mean": 0.1440261572599411, "signal/brier_reward/group_std_mean": 0.1866879642009735, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01800326965749264, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01800326965749264, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0526352696120739, "signal/confidence_uniqueness_reward/group_std_mean": 0.07682174444198608, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006579408701509237, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006579408701509237, "signal/format_reward/centered_abs_mean": 0.02632378451526165, "signal/format_reward/group_std_mean": 0.045665005967020986, "signal/format_reward/group_zero_std_frac": 0.8277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013161892257630824, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013161892257630824, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021879581967368723, "signal/frontier_aurc_reward/group_std_mean": 0.0034705805126577617, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9164449844975024e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9164449844975024e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10926453918218612, "signal/frontier_coverage_1/group_std_mean": 0.15879356861114502, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_10/centered_abs_mean": 0.10926453918218612, "signal/frontier_coverage_10/group_std_mean": 0.15879356861114502, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_15/centered_abs_mean": 0.10926453918218612, "signal/frontier_coverage_15/group_std_mean": 0.15879356861114502, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_20/centered_abs_mean": 0.10926453918218612, "signal/frontier_coverage_20/group_std_mean": 0.15879356861114502, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_25/centered_abs_mean": 0.06192091777920723, "signal/frontier_coverage_25/group_std_mean": 0.08779880106449127, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001108384388498962, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001108384388498962, "signal/frontier_coverage_5/centered_abs_mean": 0.10926453918218612, "signal/frontier_coverage_5/group_std_mean": 0.15879356861114502, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019558351254090668, "signal/frontier_ece_reward/centered_abs_mean": 0.015209457091987132, "signal/frontier_ece_reward/group_std_mean": 0.019499244540929793, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019011821364983915, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019011821364983915, "step": 115 }, { "calibration/aurc": 0.29482160200693, "calibration/batch_distribution_entropy": 0.8057582916578456, "calibration/buffer_distribution_entropy": 0.9189735844921059, "calibration/confidence_entropy": 0.41709737555410004, "calibration/coverage@0%": 0.028767226550989223, "calibration/coverage@1%": 0.028767226550989223, "calibration/coverage@10%": 0.11131487154184057, "calibration/coverage@15%": 0.23079975514591305, "calibration/coverage@20%": 0.31510173209402237, "calibration/coverage@25%": 0.5533219287673521, "calibration/coverage@30%": 0.5847659094969149, "calibration/coverage@5%": 0.053310307490937, "calibration/ece": 0.16388524171934898, "calibration/mean_confidence": 0.7294418950441234, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 4023.4, "completions/max_terminated_length": 4023.4, "completions/mean_length": 1133.4726806640624, "completions/mean_terminated_length": 1146.8188232421876, "completions/min_length": 0.0, "completions/min_terminated_length": 325.2, "epoch": 0.28799640004499943, "grad_norm": 0.00026670683291740716, "learning_rate": 2.6506024096385547e-06, "loss": -0.0099, "num_tokens": 357293889.0, "reward": 1.055515742301941, "reward_std": 0.13213830143213273, "rewards/accuracy_reward": 0.6822048664093018, "rewards/brier_reward": 0.7987765789031982, "rewards/confidence_uniqueness_reward": 0.9297965288162231, "rewards/format_reward": 0.9881076335906982, "rewards/frontier_aurc_reward": -0.0020097248489037156, "rewards/frontier_coverage_1": 0.027725940570235252, "rewards/frontier_coverage_10": 0.027725940570235252, "rewards/frontier_coverage_15": 0.027725940570235252, "rewards/frontier_coverage_20": 0.027725940570235252, "rewards/frontier_coverage_25": 0.031627381592988967, "rewards/frontier_coverage_5": 0.027725940570235252, "rewards/frontier_ece_reward": 0.010209777392446995, "signal/accuracy_reward/centered_abs_mean": 0.15929362177848816, "signal/accuracy_reward/group_std_mean": 0.20812316238880157, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07964681088924408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07964681088924408, "signal/advantage_abs_mean": 0.09782680720090867, "signal/advantage_pre_scale_abs_mean": 0.09782680720090867, "signal/advantage_pre_scale_std": 0.1714523106813431, "signal/advantage_std": 0.1714523106813431, "signal/brier_reward/centered_abs_mean": 0.14243703782558442, "signal/brier_reward/group_std_mean": 0.1831719845533371, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017804629728198053, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017804629728198053, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.043136756867170334, "signal/confidence_uniqueness_reward/group_std_mean": 0.06378482431173324, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005392094608396292, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005392094608396292, "signal/format_reward/centered_abs_mean": 0.01975368931889534, "signal/format_reward/group_std_mean": 0.03594511151313782, "signal/format_reward/group_zero_std_frac": 0.8555555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00987684465944767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00987684465944767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019732348155230284, "signal/frontier_aurc_reward/group_std_mean": 0.003135677380487323, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.532090340740979e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.532090340740979e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11823989003896714, "signal/frontier_coverage_1/group_std_mean": 0.16819217205047607, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_10/centered_abs_mean": 0.11823989003896714, "signal/frontier_coverage_10/group_std_mean": 0.16819217205047607, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_15/centered_abs_mean": 0.11823989003896714, "signal/frontier_coverage_15/group_std_mean": 0.16819217205047607, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_20/centered_abs_mean": 0.11823989003896714, "signal/frontier_coverage_20/group_std_mean": 0.16819217205047607, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_25/centered_abs_mean": 0.058430235087871554, "signal/frontier_coverage_25/group_std_mean": 0.08083060085773468, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010459011886268855, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010459011886268855, "signal/frontier_coverage_5/centered_abs_mean": 0.11823989003896714, "signal/frontier_coverage_5/group_std_mean": 0.16819217205047607, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021164938574656845, "signal/frontier_ece_reward/centered_abs_mean": 0.015391989797353744, "signal/frontier_ece_reward/group_std_mean": 0.019800475612282754, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001923998724669218, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001923998724669218, "step": 120 }, { "calibration/aurc": 0.16268769554340473, "calibration/batch_distribution_entropy": 0.7832053317397112, "calibration/buffer_distribution_entropy": 0.9172995538320976, "calibration/confidence_entropy": 0.4107882901625729, "calibration/coverage@0%": 0.014583333333333332, "calibration/coverage@1%": 0.014583333333333332, "calibration/coverage@10%": 0.337894364664926, "calibration/coverage@15%": 0.412960021052384, "calibration/coverage@20%": 0.7071476371933291, "calibration/coverage@25%": 0.8396603979959071, "calibration/coverage@30%": 0.9393918918918919, "calibration/coverage@5%": 0.1550397084421236, "calibration/ece": 0.09877240124714463, "calibration/mean_confidence": 0.7348249590106904, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555537, "completions/max_length": 3864.6, "completions/max_terminated_length": 3864.6, "completions/mean_length": 1138.81005859375, "completions/mean_terminated_length": 1150.768701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 324.8, "epoch": 0.2999962500468744, "grad_norm": 0.0003056655405089259, "learning_rate": 2.5e-06, "loss": -0.0101, "num_tokens": 373530645.0, "reward": 1.0652480840682983, "reward_std": 0.1264243721961975, "rewards/accuracy_reward": 0.6951388955116272, "rewards/brier_reward": 0.8122022986412049, "rewards/confidence_uniqueness_reward": 0.9318351149559021, "rewards/format_reward": 0.9897569417953491, "rewards/frontier_aurc_reward": -0.0017485103104263543, "rewards/frontier_coverage_1": 0.03179278327152133, "rewards/frontier_coverage_10": 0.03179278327152133, "rewards/frontier_coverage_15": 0.03179278327152133, "rewards/frontier_coverage_20": 0.03179278327152133, "rewards/frontier_coverage_25": 0.03835425637662411, "rewards/frontier_coverage_5": 0.03179278327152133, "rewards/frontier_ece_reward": 0.010358355939388275, "signal/accuracy_reward/centered_abs_mean": 0.15129123330116273, "signal/accuracy_reward/group_std_mean": 0.20266271233558655, "signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07564561665058137, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07564561665058137, "signal/advantage_abs_mean": 0.09205293953418732, "signal/advantage_pre_scale_abs_mean": 0.09205293953418732, "signal/advantage_pre_scale_std": 0.16289040446281433, "signal/advantage_std": 0.16289040446281433, "signal/brier_reward/centered_abs_mean": 0.1405400887131691, "signal/brier_reward/group_std_mean": 0.18279159367084502, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017567511089146136, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017567511089146136, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04126947373151779, "signal/confidence_uniqueness_reward/group_std_mean": 0.06011983305215836, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005158684216439724, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005158684216439724, "signal/format_reward/centered_abs_mean": 0.01725260429084301, "signal/format_reward/group_std_mean": 0.03177933469414711, "signal/format_reward/group_zero_std_frac": 0.8722222328186036, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008626302145421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008626302145421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018054211512207984, "signal/frontier_aurc_reward/group_std_mean": 0.0029377146624028684, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.231703594792634e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.231703594792634e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12659138143062593, "signal/frontier_coverage_1/group_std_mean": 0.18150453865528107, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_10/centered_abs_mean": 0.12659138143062593, "signal/frontier_coverage_10/group_std_mean": 0.18150453865528107, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_15/centered_abs_mean": 0.12659138143062593, "signal/frontier_coverage_15/group_std_mean": 0.18150453865528107, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_20/centered_abs_mean": 0.12659138143062593, "signal/frontier_coverage_20/group_std_mean": 0.18150453865528107, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_25/centered_abs_mean": 0.06125762164592743, "signal/frontier_coverage_25/group_std_mean": 0.08552575558423996, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001096511399373412, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001096511399373412, "signal/frontier_coverage_5/centered_abs_mean": 0.12659138143062593, "signal/frontier_coverage_5/group_std_mean": 0.18150453865528107, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022659855661913753, "signal/frontier_ece_reward/centered_abs_mean": 0.01536604668945074, "signal/frontier_ece_reward/group_std_mean": 0.019951780140399934, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019207558361813426, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019207558361813426, "step": 125 }, { "calibration/aurc": 0.21656307838282568, "calibration/batch_distribution_entropy": 0.8418151261521143, "calibration/buffer_distribution_entropy": 0.9155357951937564, "calibration/confidence_entropy": 0.4046419839826144, "calibration/coverage@0%": 0.031999336915829756, "calibration/coverage@1%": 0.031999336915829756, "calibration/coverage@10%": 0.26968682939463356, "calibration/coverage@15%": 0.3515532872272156, "calibration/coverage@20%": 0.4407191523101236, "calibration/coverage@25%": 0.5056258584505888, "calibration/coverage@30%": 0.8617951969687244, "calibration/coverage@5%": 0.03622097279973477, "calibration/ece": 0.10678372431681085, "calibration/mean_confidence": 0.6637441296139539, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017187499999999977, "completions/max_length": 3903.2, "completions/max_terminated_length": 3903.2, "completions/mean_length": 1170.849853515625, "completions/mean_terminated_length": 1191.54580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 298.4, "epoch": 0.3119961000487494, "grad_norm": 0.0003094042185693979, "learning_rate": 2.349397590361446e-06, "loss": -0.0146, "num_tokens": 390143635.0, "reward": 1.0414445877075196, "reward_std": 0.14395586848258973, "rewards/accuracy_reward": 0.6592013955116272, "rewards/brier_reward": 0.7887712478637695, "rewards/confidence_uniqueness_reward": 0.9302976727485657, "rewards/format_reward": 0.9826388955116272, "rewards/frontier_aurc_reward": -0.001909919991157949, "rewards/frontier_coverage_1": 0.04098189903888851, "rewards/frontier_coverage_10": 0.04098189903888851, "rewards/frontier_coverage_15": 0.04098189903888851, "rewards/frontier_coverage_20": 0.04098189903888851, "rewards/frontier_coverage_25": 0.04001305103302002, "rewards/frontier_coverage_5": 0.04098189903888851, "rewards/frontier_ece_reward": 0.010327290836721658, "signal/accuracy_reward/centered_abs_mean": 0.1720269113779068, "signal/accuracy_reward/group_std_mean": 0.22580362856388092, "signal/accuracy_reward/group_zero_std_frac": 0.3666666686534882, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0860134556889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0860134556889534, "signal/advantage_abs_mean": 0.10680273771286011, "signal/advantage_pre_scale_abs_mean": 0.10680273771286011, "signal/advantage_pre_scale_std": 0.17836588323116304, "signal/advantage_std": 0.17836588323116304, "signal/brier_reward/centered_abs_mean": 0.1629619389772415, "signal/brier_reward/group_std_mean": 0.21055279076099395, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02037024237215519, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02037024237215519, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.045919667929410934, "signal/confidence_uniqueness_reward/group_std_mean": 0.06701685413718224, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005739958491176367, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005739958491176367, "signal/format_reward/centered_abs_mean": 0.026041666232049464, "signal/format_reward/group_std_mean": 0.043287623673677444, "signal/format_reward/group_zero_std_frac": 0.8361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013020833116024732, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013020833116024732, "signal/frontier_aurc_reward/centered_abs_mean": 0.001999038900248706, "signal/frontier_aurc_reward/group_std_mean": 0.003307389048859477, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5782793565886097e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5782793565886097e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15618787109851837, "signal/frontier_coverage_1/group_std_mean": 0.21958717703819275, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_10/centered_abs_mean": 0.15618787109851837, "signal/frontier_coverage_10/group_std_mean": 0.21958717703819275, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_15/centered_abs_mean": 0.15618787109851837, "signal/frontier_coverage_15/group_std_mean": 0.21958717703819275, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_20/centered_abs_mean": 0.15618787109851837, "signal/frontier_coverage_20/group_std_mean": 0.21958717703819275, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_25/centered_abs_mean": 0.07262331247329712, "signal/frontier_coverage_25/group_std_mean": 0.10001310557127, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001299957255832851, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001299957255832851, "signal/frontier_coverage_5/centered_abs_mean": 0.15618787109851837, "signal/frontier_coverage_5/group_std_mean": 0.21958717703819275, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002795762661844492, "signal/frontier_ece_reward/centered_abs_mean": 0.01741938292980194, "signal/frontier_ece_reward/group_std_mean": 0.022634774819016455, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021774228662252426, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021774228662252426, "step": 130 }, { "calibration/aurc": 0.23023647946494208, "calibration/batch_distribution_entropy": 0.7881558617702, "calibration/buffer_distribution_entropy": 0.9147557758253775, "calibration/confidence_entropy": 0.36381260329450316, "calibration/coverage@0%": 0.009973753280839895, "calibration/coverage@1%": 0.009973753280839895, "calibration/coverage@10%": 0.1994750656167979, "calibration/coverage@15%": 0.34993662280701754, "calibration/coverage@20%": 0.4895458333333333, "calibration/coverage@25%": 0.6612416666666667, "calibration/coverage@30%": 0.7436381578947369, "calibration/coverage@5%": 0.1853018372703412, "calibration/ece": 0.16311325265378912, "calibration/mean_confidence": 0.7145244028217494, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666674, "completions/max_length": 3853.2, "completions/max_terminated_length": 3853.2, "completions/mean_length": 1161.9842041015625, "completions/mean_terminated_length": 1176.113623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 273.6, "epoch": 0.32399595005062437, "grad_norm": 0.000337834469974041, "learning_rate": 2.1987951807228917e-06, "loss": -0.0106, "num_tokens": 406622717.0, "reward": 1.0532086849212647, "reward_std": 0.1362074300646782, "rewards/accuracy_reward": 0.6723958373069763, "rewards/brier_reward": 0.7999706029891968, "rewards/confidence_uniqueness_reward": 0.9347299575805664, "rewards/format_reward": 0.9876736044883728, "rewards/frontier_aurc_reward": -0.001792767085134983, "rewards/frontier_coverage_1": 0.04573216512799263, "rewards/frontier_coverage_10": 0.04573216512799263, "rewards/frontier_coverage_15": 0.04573216512799263, "rewards/frontier_coverage_20": 0.04573216512799263, "rewards/frontier_coverage_25": 0.04576835259795189, "rewards/frontier_coverage_5": 0.04573216512799263, "rewards/frontier_ece_reward": 0.011649912409484386, "signal/accuracy_reward/centered_abs_mean": 0.1695746511220932, "signal/accuracy_reward/group_std_mean": 0.2238933861255646, "signal/accuracy_reward/group_zero_std_frac": 0.3611111044883728, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0847873255610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0847873255610466, "signal/advantage_abs_mean": 0.09791278541088104, "signal/advantage_pre_scale_abs_mean": 0.09791278541088104, "signal/advantage_pre_scale_std": 0.16925244629383088, "signal/advantage_std": 0.16925244629383088, "signal/brier_reward/centered_abs_mean": 0.1602795511484146, "signal/brier_reward/group_std_mean": 0.2083508402109146, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020034943893551826, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020034943893551826, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04163586869835854, "signal/confidence_uniqueness_reward/group_std_mean": 0.06607603505253792, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005204483587294817, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005204483587294817, "signal/format_reward/centered_abs_mean": 0.02120225690305233, "signal/format_reward/group_std_mean": 0.04186696708202362, "signal/format_reward/group_zero_std_frac": 0.819444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010601128451526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010601128451526165, "signal/frontier_aurc_reward/centered_abs_mean": 0.001877031335607171, "signal/frontier_aurc_reward/group_std_mean": 0.002974188607186079, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.359886177349836e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.359886177349836e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1724255710840225, "signal/frontier_coverage_1/group_std_mean": 0.2394712746143341, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_10/centered_abs_mean": 0.1724255710840225, "signal/frontier_coverage_10/group_std_mean": 0.2394712746143341, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_15/centered_abs_mean": 0.1724255710840225, "signal/frontier_coverage_15/group_std_mean": 0.2394712746143341, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_20/centered_abs_mean": 0.1724255710840225, "signal/frontier_coverage_20/group_std_mean": 0.2394712746143341, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_25/centered_abs_mean": 0.07950729578733444, "signal/frontier_coverage_25/group_std_mean": 0.10724284201860428, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001423180545680225, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001423180545680225, "signal/frontier_coverage_5/centered_abs_mean": 0.1724255710840225, "signal/frontier_coverage_5/group_std_mean": 0.2394712746143341, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003086417680606246, "signal/frontier_ece_reward/centered_abs_mean": 0.01801157519221306, "signal/frontier_ece_reward/group_std_mean": 0.023027915880084036, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022514468990266325, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022514468990266325, "step": 135 }, { "calibration/aurc": 0.1906562783077283, "calibration/batch_distribution_entropy": 0.8116671180904721, "calibration/buffer_distribution_entropy": 0.9134489331110502, "calibration/confidence_entropy": 0.3747389081244313, "calibration/coverage@0%": 0.0015665796344647518, "calibration/coverage@1%": 0.0015665796344647518, "calibration/coverage@10%": 0.14696577713433928, "calibration/coverage@15%": 0.3754393639553163, "calibration/coverage@20%": 0.5833659052356869, "calibration/coverage@25%": 0.7757019471645958, "calibration/coverage@30%": 0.9518546877147177, "calibration/coverage@5%": 0.08167410651618517, "calibration/ece": 0.14092391131055967, "calibration/mean_confidence": 0.7090212503403477, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010156249999999978, "completions/max_length": 3922.8, "completions/max_terminated_length": 3922.8, "completions/mean_length": 1160.94765625, "completions/mean_terminated_length": 1172.8401611328125, "completions/min_length": 0.0, "completions/min_terminated_length": 344.0, "epoch": 0.33599580005249935, "grad_norm": 0.00027862933347932994, "learning_rate": 2.0481927710843377e-06, "loss": -0.0091, "num_tokens": 423101058.0, "reward": 1.0563385009765625, "reward_std": 0.12681164890527724, "rewards/accuracy_reward": 0.6782986044883728, "rewards/brier_reward": 0.7978750586509704, "rewards/confidence_uniqueness_reward": 0.9353888034820557, "rewards/format_reward": 0.9897569417953491, "rewards/frontier_aurc_reward": -0.001826054509729147, "rewards/frontier_coverage_1": 0.0388026436092332, "rewards/frontier_coverage_10": 0.0388026436092332, "rewards/frontier_coverage_15": 0.0388026436092332, "rewards/frontier_coverage_20": 0.0373392676236108, "rewards/frontier_coverage_25": 0.04751555323600769, "rewards/frontier_coverage_5": 0.0388026436092332, "rewards/frontier_ece_reward": 0.011105910316109658, "signal/accuracy_reward/centered_abs_mean": 0.15526258647441865, "signal/accuracy_reward/group_std_mean": 0.2096972107887268, "signal/accuracy_reward/group_zero_std_frac": 0.3833333313465118, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07763129323720933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07763129323720933, "signal/advantage_abs_mean": 0.09159746617078782, "signal/advantage_pre_scale_abs_mean": 0.09159746617078782, "signal/advantage_pre_scale_std": 0.1599065124988556, "signal/advantage_std": 0.1599065124988556, "signal/brier_reward/centered_abs_mean": 0.15635252892971038, "signal/brier_reward/group_std_mean": 0.20334820151329042, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019544066116213798, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019544066116213798, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039848759025335315, "signal/confidence_uniqueness_reward/group_std_mean": 0.05892389565706253, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004981094878166914, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004981094878166914, "signal/format_reward/centered_abs_mean": 0.01740451380610466, "signal/format_reward/group_std_mean": 0.03113800659775734, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00870225690305233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00870225690305233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019128842512145638, "signal/frontier_aurc_reward/group_std_mean": 0.00324101191945374, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.424062597332522e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.424062597332522e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16529696583747863, "signal/frontier_coverage_1/group_std_mean": 0.2307574212551117, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029588155448436737, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029588155448436737, "signal/frontier_coverage_10/centered_abs_mean": 0.16529696583747863, "signal/frontier_coverage_10/group_std_mean": 0.2307574212551117, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029588155448436737, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029588155448436737, "signal/frontier_coverage_15/centered_abs_mean": 0.16529696583747863, "signal/frontier_coverage_15/group_std_mean": 0.2307574212551117, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029588155448436737, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029588155448436737, "signal/frontier_coverage_20/centered_abs_mean": 0.1624933660030365, "signal/frontier_coverage_20/group_std_mean": 0.22703517079353333, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00290863118134439, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00290863118134439, "signal/frontier_coverage_25/centered_abs_mean": 0.07454123198986054, "signal/frontier_coverage_25/group_std_mean": 0.09904106110334396, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013342880178242923, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013342880178242923, "signal/frontier_coverage_5/centered_abs_mean": 0.16529696583747863, "signal/frontier_coverage_5/group_std_mean": 0.2307574212551117, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029588155448436737, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029588155448436737, "signal/frontier_ece_reward/centered_abs_mean": 0.01730274744331837, "signal/frontier_ece_reward/group_std_mean": 0.02207809016108513, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002162843430414796, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002162843430414796, "step": 140 }, { "calibration/aurc": 0.18805905954891727, "calibration/batch_distribution_entropy": 0.8317421531074294, "calibration/buffer_distribution_entropy": 0.9134495671386187, "calibration/confidence_entropy": 0.3853728989005753, "calibration/coverage@0%": 0.026336837841754234, "calibration/coverage@1%": 0.026336837841754234, "calibration/coverage@10%": 0.310333026899385, "calibration/coverage@15%": 0.41578143820163777, "calibration/coverage@20%": 0.5445131776015526, "calibration/coverage@25%": 0.7681225346528582, "calibration/coverage@30%": 0.8655409301907436, "calibration/coverage@5%": 0.06604470680494234, "calibration/ece": 0.09805606586818498, "calibration/mean_confidence": 0.6831073168113699, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111071, "completions/max_length": 3604.8, "completions/max_terminated_length": 3604.8, "completions/mean_length": 1115.1513427734376, "completions/mean_terminated_length": 1125.17373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 308.6, "epoch": 0.34799565005437433, "grad_norm": 0.00028119460330344737, "learning_rate": 1.8975903614457832e-06, "loss": -0.0078, "num_tokens": 439012209.0, "reward": 1.0752248764038086, "reward_std": 0.11887196749448777, "rewards/accuracy_reward": 0.71015625, "rewards/brier_reward": 0.8194115638732911, "rewards/confidence_uniqueness_reward": 0.9335981130599975, "rewards/format_reward": 0.9911458253860473, "rewards/frontier_aurc_reward": -0.0018106767674908042, "rewards/frontier_coverage_1": 0.03475271426141262, "rewards/frontier_coverage_10": 0.03475271426141262, "rewards/frontier_coverage_15": 0.03475271426141262, "rewards/frontier_coverage_20": 0.03500533141195774, "rewards/frontier_coverage_25": 0.06168616786599159, "rewards/frontier_coverage_5": 0.03475271426141262, "rewards/frontier_ece_reward": 0.01008757334202528, "signal/accuracy_reward/centered_abs_mean": 0.13445637822151185, "signal/accuracy_reward/group_std_mean": 0.18606190383434296, "signal/accuracy_reward/group_zero_std_frac": 0.43333333134651186, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06722818911075593, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06722818911075593, "signal/advantage_abs_mean": 0.08313022255897522, "signal/advantage_pre_scale_abs_mean": 0.08313022255897522, "signal/advantage_pre_scale_std": 0.15367571711540223, "signal/advantage_std": 0.15367571711540223, "signal/brier_reward/centered_abs_mean": 0.13741165697574614, "signal/brier_reward/group_std_mean": 0.1829265683889389, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017176457121968268, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017176457121968268, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03806578069925308, "signal/confidence_uniqueness_reward/group_std_mean": 0.06106965392827988, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004758222587406635, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004758222587406635, "signal/format_reward/centered_abs_mean": 0.015831163432449103, "signal/format_reward/group_std_mean": 0.03331194259226322, "signal/format_reward/group_zero_std_frac": 0.850000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007915581716224552, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007915581716224552, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020372898085042833, "signal/frontier_aurc_reward/group_std_mean": 0.0033891588915139436, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.646748591563664e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.646748591563664e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12970810532569885, "signal/frontier_coverage_1/group_std_mean": 0.18752795159816743, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002321774885058403, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002321774885058403, "signal/frontier_coverage_10/centered_abs_mean": 0.12970810532569885, "signal/frontier_coverage_10/group_std_mean": 0.18752795159816743, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002321774885058403, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002321774885058403, "signal/frontier_coverage_15/centered_abs_mean": 0.12970810532569885, "signal/frontier_coverage_15/group_std_mean": 0.18752795159816743, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002321774885058403, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002321774885058403, "signal/frontier_coverage_20/centered_abs_mean": 0.10424077808856964, "signal/frontier_coverage_20/group_std_mean": 0.15195895731449127, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018659099237993359, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018659099237993359, "signal/frontier_coverage_25/centered_abs_mean": 0.06490491330623627, "signal/frontier_coverage_25/group_std_mean": 0.08455234318971634, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011617979034781456, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011617979034781456, "signal/frontier_coverage_5/centered_abs_mean": 0.12970810532569885, "signal/frontier_coverage_5/group_std_mean": 0.18752795159816743, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002321774885058403, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002321774885058403, "signal/frontier_ece_reward/centered_abs_mean": 0.014038374833762645, "signal/frontier_ece_reward/group_std_mean": 0.01795310601592064, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017547968542203306, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017547968542203306, "step": 145 }, { "calibration/aurc": 0.16444658049930577, "calibration/batch_distribution_entropy": 0.8554792996980854, "calibration/buffer_distribution_entropy": 0.9121123999463061, "calibration/confidence_entropy": 0.4054611707906853, "calibration/coverage@0%": 0.10798269230769231, "calibration/coverage@1%": 0.12177579575596817, "calibration/coverage@10%": 0.43126122905874886, "calibration/coverage@15%": 0.5021995007429834, "calibration/coverage@20%": 0.6263020287958115, "calibration/coverage@25%": 0.6916137652705061, "calibration/coverage@30%": 0.7823606457242582, "calibration/coverage@5%": 0.30052938770999116, "calibration/ece": 0.14277405261871456, "calibration/mean_confidence": 0.6701547841980939, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009461805555555558, "completions/max_length": 3815.6, "completions/max_terminated_length": 3815.6, "completions/mean_length": 1194.10302734375, "completions/mean_terminated_length": 1205.4841552734374, "completions/min_length": 0.0, "completions/min_terminated_length": 301.4, "epoch": 0.3599955000562493, "grad_norm": 0.00032394277513958514, "learning_rate": 1.7469879518072292e-06, "loss": -0.0072, "num_tokens": 455878612.0, "reward": 1.0641360998153686, "reward_std": 0.12625986337661743, "rewards/accuracy_reward": 0.688368046283722, "rewards/brier_reward": 0.8141419291496277, "rewards/confidence_uniqueness_reward": 0.9379014372825623, "rewards/format_reward": 0.9903645753860474, "rewards/frontier_aurc_reward": -0.0015774117084220052, "rewards/frontier_coverage_1": 0.040133790113031864, "rewards/frontier_coverage_10": 0.040133790113031864, "rewards/frontier_coverage_15": 0.03999885078519583, "rewards/frontier_coverage_20": 0.037344107404351234, "rewards/frontier_coverage_25": 0.06747718080878258, "rewards/frontier_coverage_5": 0.040133790113031864, "rewards/frontier_ece_reward": 0.008361095190048217, "signal/accuracy_reward/centered_abs_mean": 0.1630425363779068, "signal/accuracy_reward/group_std_mean": 0.2126096099615097, "signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0815212681889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0815212681889534, "signal/advantage_abs_mean": 0.09208865314722062, "signal/advantage_pre_scale_abs_mean": 0.09208865314722062, "signal/advantage_pre_scale_std": 0.1610693395137787, "signal/advantage_std": 0.1610693395137787, "signal/brier_reward/centered_abs_mean": 0.14592998921871186, "signal/brier_reward/group_std_mean": 0.19062794744968414, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018241248652338983, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018241248652338983, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03703403770923615, "signal/confidence_uniqueness_reward/group_std_mean": 0.056619017571210864, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046292547136545185, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046292547136545185, "signal/format_reward/centered_abs_mean": 0.015988498367369174, "signal/format_reward/group_std_mean": 0.030791251361370085, "signal/format_reward/group_zero_std_frac": 0.8722222328186036, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007994249183684587, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007994249183684587, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018028806429356337, "signal/frontier_aurc_reward/group_std_mean": 0.0029539034236222505, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.227156230423134e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.227156230423134e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15507982075214385, "signal/frontier_coverage_1/group_std_mean": 0.2211749255657196, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027759287506341932, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027759287506341932, "signal/frontier_coverage_10/centered_abs_mean": 0.15507982075214385, "signal/frontier_coverage_10/group_std_mean": 0.2211749255657196, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027759287506341932, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027759287506341932, "signal/frontier_coverage_15/centered_abs_mean": 0.15409548580646515, "signal/frontier_coverage_15/group_std_mean": 0.21988695561885835, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002758309058845043, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002758309058845043, "signal/frontier_coverage_20/centered_abs_mean": 0.10383160263299943, "signal/frontier_coverage_20/group_std_mean": 0.15046164393424988, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018585855141282082, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018585855141282082, "signal/frontier_coverage_25/centered_abs_mean": 0.06960556581616402, "signal/frontier_coverage_25/group_std_mean": 0.08995560258626938, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012459396151825787, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012459396151825787, "signal/frontier_coverage_5/centered_abs_mean": 0.15507982075214385, "signal/frontier_coverage_5/group_std_mean": 0.2211749255657196, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027759287506341932, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027759287506341932, "signal/frontier_ece_reward/centered_abs_mean": 0.013568481430411339, "signal/frontier_ece_reward/group_std_mean": 0.017983463406562806, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016960601788014174, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016960601788014174, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.12589744753156493, "eval_calibration/batch_distribution_entropy": 0.792195222040961, "eval_calibration/buffer_distribution_entropy": 0.9098214718589893, "eval_calibration/confidence_entropy": 0.42053250993043306, "eval_calibration/coverage@0%": 0.2567204301075269, "eval_calibration/coverage@1%": 0.2567204301075269, "eval_calibration/coverage@10%": 0.43531586021505375, "eval_calibration/coverage@15%": 0.5924059139784946, "eval_calibration/coverage@20%": 0.8136760752688174, "eval_calibration/coverage@25%": 0.9420362903225806, "eval_calibration/coverage@30%": 0.9895833333333334, "eval_calibration/coverage@5%": 0.29838709677419356, "eval_calibration/ece": 0.14721396114449362, "eval_calibration/mean_confidence": 0.6862037135866892, "eval_completions/clipped_ratio": 0.006076388888888895, "eval_completions/max_length": 2771.8333333333335, "eval_completions/max_terminated_length": 2771.8333333333335, "eval_completions/mean_length": 1130.8981323242188, "eval_completions/mean_terminated_length": 1137.8710530598958, "eval_completions/min_length": 159.83333333333334, "eval_completions/min_terminated_length": 384.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 455878612.0, "eval_reward": 1.06011829773585, "eval_reward_std": 0.245634155968825, "eval_rewards/accuracy_reward": 0.6901041666666666, "eval_rewards/brier_reward": 0.8112742304801941, "eval_rewards/confidence_uniqueness_reward": 0.887604296207428, "eval_rewards/format_reward": 0.9939236144224802, "eval_rewards/frontier_aurc_reward": -0.001700426151122277, "eval_rewards/frontier_coverage_1": 0.0397941037081182, "eval_rewards/frontier_coverage_10": 0.0397941037081182, "eval_rewards/frontier_coverage_15": 0.039953491340080895, "eval_rewards/frontier_coverage_20": 0.0386975952424109, "eval_rewards/frontier_coverage_25": 0.0703319435318311, "eval_rewards/frontier_coverage_5": 0.0397941037081182, "eval_rewards/frontier_ece_reward": 0.007770234486088157, "eval_runtime": 194.5895, "eval_samples_per_second": 5.139, "eval_signal/accuracy_reward/centered_abs_mean": 0.4119466145833333, "eval_signal/accuracy_reward/group_std_mean": 0.45975885291894275, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20597330729166666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20597330729166666, "eval_signal/advantage_abs_mean": 0.21174462139606476, "eval_signal/advantage_pre_scale_abs_mean": 0.21174462139606476, "eval_signal/advantage_pre_scale_std": 0.24418220420678458, "eval_signal/advantage_std": 0.24418220420678458, "eval_signal/brier_reward/centered_abs_mean": 0.21792598068714142, "eval_signal/brier_reward/group_std_mean": 0.2779506991306941, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027240747585892677, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.027240747585892677, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05039315981169542, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07548397406935692, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006299144976461927, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006299144976461927, "eval_signal/format_reward/centered_abs_mean": 0.01177300326526165, "eval_signal/format_reward/group_std_mean": 0.034373246133327484, "eval_signal/format_reward/group_zero_std_frac": 0.8055555721124014, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005886501632630825, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.005886501632630825, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002665710848911355, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004803995058561365, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7716222676778365e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7716222676778365e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.20755265404780707, "eval_signal/frontier_coverage_1/group_std_mean": 0.33392194906870526, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037151926274721823, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037151926274721823, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.20755265404780707, "eval_signal/frontier_coverage_10/group_std_mean": 0.33392194906870526, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037151926274721823, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037151926274721823, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.20377135028441748, "eval_signal/frontier_coverage_15/group_std_mean": 0.3286268611749013, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003647507051937282, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003647507051937282, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.12508864452441534, "eval_signal/frontier_coverage_20/group_std_mean": 0.2087546760837237, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002239086684615662, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002239086684615662, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.09835883850852649, "eval_signal/frontier_coverage_25/group_std_mean": 0.12412550052007039, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017606231267563999, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017606231267563999, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.20755265404780707, "eval_signal/frontier_coverage_5/group_std_mean": 0.33392194906870526, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037151926274721823, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037151926274721823, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.01478797197341919, "eval_signal/frontier_ece_reward/group_std_mean": 0.02143588351706664, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018484964966773987, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018484964966773987, "eval_steps_per_second": 0.031, "step": 150 }, { "calibration/aurc": 0.1592934062862865, "calibration/batch_distribution_entropy": 0.8200635510914287, "calibration/buffer_distribution_entropy": 0.9056196886811655, "calibration/confidence_entropy": 0.4273629669999046, "calibration/coverage@0%": 0.05090042510789072, "calibration/coverage@1%": 0.07300568826578545, "calibration/coverage@10%": 0.4090462757088025, "calibration/coverage@15%": 0.49287078530957606, "calibration/coverage@20%": 0.7218659605131165, "calibration/coverage@25%": 0.7721199035330008, "calibration/coverage@30%": 0.8353387467191601, "calibration/coverage@5%": 0.25320305668683807, "calibration/ece": 0.11551180587161887, "calibration/mean_confidence": 0.7259954579627551, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00746527777777779, "completions/max_length": 3773.0, "completions/max_terminated_length": 3773.0, "completions/mean_length": 1111.4385498046875, "completions/mean_terminated_length": 1119.8486083984376, "completions/min_length": 0.0, "completions/min_terminated_length": 335.0, "epoch": 0.3719953500581243, "grad_norm": 0.0003368077159393579, "learning_rate": 1.5963855421686747e-06, "loss": -0.0063, "num_tokens": 471790096.0, "reward": 1.0929233312606812, "reward_std": 0.12424634695053101, "rewards/accuracy_reward": 0.7425347208976746, "rewards/brier_reward": 0.8317407846450806, "rewards/confidence_uniqueness_reward": 0.9393562436103821, "rewards/format_reward": 0.9925347089767456, "rewards/frontier_aurc_reward": -0.0012545568635687232, "rewards/frontier_coverage_1": 0.017057520151138306, "rewards/frontier_coverage_10": 0.017057520151138306, "rewards/frontier_coverage_15": 0.017654052283614875, "rewards/frontier_coverage_20": 0.026030075177550314, "rewards/frontier_coverage_25": 0.08343757688999176, "rewards/frontier_coverage_5": 0.017057520151138306, "rewards/frontier_ece_reward": 0.0066596436314284805, "signal/accuracy_reward/centered_abs_mean": 0.15950520634651183, "signal/accuracy_reward/group_std_mean": 0.21293214857578277, "signal/accuracy_reward/group_zero_std_frac": 0.3777777820825577, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07975260317325591, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07975260317325591, "signal/advantage_abs_mean": 0.08897002339363098, "signal/advantage_pre_scale_abs_mean": 0.08897002339363098, "signal/advantage_pre_scale_std": 0.1588042050600052, "signal/advantage_std": 0.1588042050600052, "signal/brier_reward/centered_abs_mean": 0.1306125193834305, "signal/brier_reward/group_std_mean": 0.17341192066669464, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01632656492292881, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01632656492292881, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034348542988300326, "signal/confidence_uniqueness_reward/group_std_mean": 0.05231625810265541, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004293567873537541, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004293567873537541, "signal/format_reward/centered_abs_mean": 0.013487413339316846, "signal/format_reward/group_std_mean": 0.027320950850844385, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006743706669658423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006743706669658423, "signal/frontier_aurc_reward/centered_abs_mean": 0.001531174755655229, "signal/frontier_aurc_reward/group_std_mean": 0.002751293499022722, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7408025925979018e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7408025925979018e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13883511126041412, "signal/frontier_coverage_1/group_std_mean": 0.1974550575017929, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024851484689861537, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024851484689861537, "signal/frontier_coverage_10/centered_abs_mean": 0.13883511126041412, "signal/frontier_coverage_10/group_std_mean": 0.1974550575017929, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024851484689861537, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024851484689861537, "signal/frontier_coverage_15/centered_abs_mean": 0.13508277088403703, "signal/frontier_coverage_15/group_std_mean": 0.19253535866737365, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024179814849048854, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024179814849048854, "signal/frontier_coverage_20/centered_abs_mean": 0.07531605064868926, "signal/frontier_coverage_20/group_std_mean": 0.1094200387597084, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013481572968885303, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013481572968885303, "signal/frontier_coverage_25/centered_abs_mean": 0.06770254969596863, "signal/frontier_coverage_25/group_std_mean": 0.08678248971700668, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001211875630542636, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001211875630542636, "signal/frontier_coverage_5/centered_abs_mean": 0.13883511126041412, "signal/frontier_coverage_5/group_std_mean": 0.1974550575017929, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024851484689861537, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024851484689861537, "signal/frontier_ece_reward/centered_abs_mean": 0.01135763879865408, "signal/frontier_ece_reward/group_std_mean": 0.014954530447721482, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00141970484983176, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00141970484983176, "step": 155 }, { "calibration/aurc": 0.16224591980290784, "calibration/batch_distribution_entropy": 0.7755681714330434, "calibration/buffer_distribution_entropy": 0.8944977396614957, "calibration/confidence_entropy": 0.40641718225845425, "calibration/coverage@0%": 0.021498948757059815, "calibration/coverage@1%": 0.021498948757059815, "calibration/coverage@10%": 0.4811618811064847, "calibration/coverage@15%": 0.7439212993500158, "calibration/coverage@20%": 0.7811449615918429, "calibration/coverage@25%": 0.7979112558574157, "calibration/coverage@30%": 0.8020997375328083, "calibration/coverage@5%": 0.1755658160530981, "calibration/ece": 0.10178569662618395, "calibration/mean_confidence": 0.7464084729023905, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012673611111111139, "completions/max_length": 3875.6, "completions/max_terminated_length": 3875.6, "completions/mean_length": 1103.394287109375, "completions/mean_terminated_length": 1117.539599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 307.0, "epoch": 0.38399520005999926, "grad_norm": 0.00029608793556690216, "learning_rate": 1.4457831325301204e-06, "loss": -0.0106, "num_tokens": 487588494.0, "reward": 1.053929877281189, "reward_std": 0.12981161773204802, "rewards/accuracy_reward": 0.6743055582046509, "rewards/brier_reward": 0.8059103727340698, "rewards/confidence_uniqueness_reward": 0.9313126802444458, "rewards/format_reward": 0.9873263835906982, "rewards/frontier_aurc_reward": -0.0018599932780489325, "rewards/frontier_coverage_1": 0.04324149824678898, "rewards/frontier_coverage_10": 0.04324149824678898, "rewards/frontier_coverage_15": 0.042741596698760986, "rewards/frontier_coverage_20": 0.035604484006762506, "rewards/frontier_coverage_25": 0.07923954874277114, "rewards/frontier_coverage_5": 0.04324149824678898, "rewards/frontier_ece_reward": 0.006811666022986174, "signal/accuracy_reward/centered_abs_mean": 0.1576822891831398, "signal/accuracy_reward/group_std_mean": 0.2048851728439331, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0788411445915699, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0788411445915699, "signal/advantage_abs_mean": 0.09569884538650512, "signal/advantage_pre_scale_abs_mean": 0.09569884538650512, "signal/advantage_pre_scale_std": 0.1690044015645981, "signal/advantage_std": 0.1690044015645981, "signal/brier_reward/centered_abs_mean": 0.14289563298225402, "signal/brier_reward/group_std_mean": 0.18329527378082275, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017861954122781753, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017861954122781753, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042262401431798935, "signal/confidence_uniqueness_reward/group_std_mean": 0.06386898383498192, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005282800178974867, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005282800178974867, "signal/format_reward/centered_abs_mean": 0.02110460065305233, "signal/format_reward/group_std_mean": 0.038827139884233475, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010552300326526164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010552300326526164, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020189554430544376, "signal/frontier_aurc_reward/group_std_mean": 0.003409457951784134, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.613930239225738e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.613930239225738e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14031126350164413, "signal/frontier_coverage_1/group_std_mean": 0.19471052289009094, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002511571627110243, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002511571627110243, "signal/frontier_coverage_10/centered_abs_mean": 0.14031126350164413, "signal/frontier_coverage_10/group_std_mean": 0.19471052289009094, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002511571627110243, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002511571627110243, "signal/frontier_coverage_15/centered_abs_mean": 0.13545251190662383, "signal/frontier_coverage_15/group_std_mean": 0.1884155750274658, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024245998822152613, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024245998822152613, "signal/frontier_coverage_20/centered_abs_mean": 0.07455982491374016, "signal/frontier_coverage_20/group_std_mean": 0.10475812703371049, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013346209190785885, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013346209190785885, "signal/frontier_coverage_25/centered_abs_mean": 0.07323736399412155, "signal/frontier_coverage_25/group_std_mean": 0.09230298697948455, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013109487714245915, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013109487714245915, "signal/frontier_coverage_5/centered_abs_mean": 0.14031126350164413, "signal/frontier_coverage_5/group_std_mean": 0.19471052289009094, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002511571627110243, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002511571627110243, "signal/frontier_ece_reward/centered_abs_mean": 0.010905621573328972, "signal/frontier_ece_reward/group_std_mean": 0.01399848610162735, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013632026966661215, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013632026966661215, "step": 160 }, { "calibration/aurc": 0.18802935979054125, "calibration/batch_distribution_entropy": 0.8066304456688098, "calibration/buffer_distribution_entropy": 0.8820289058028152, "calibration/confidence_entropy": 0.39912895218336397, "calibration/coverage@0%": 0.01574846717219552, "calibration/coverage@1%": 0.01574846717219552, "calibration/coverage@10%": 0.23960081124989277, "calibration/coverage@15%": 0.5227776790286016, "calibration/coverage@20%": 0.6225712078939487, "calibration/coverage@25%": 0.6983694035928395, "calibration/coverage@30%": 0.8085135795463503, "calibration/coverage@5%": 0.04206425664587973, "calibration/ece": 0.1298895751168595, "calibration/mean_confidence": 0.7157046755256667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012847222222222232, "completions/max_length": 3910.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 1123.602978515625, "completions/mean_terminated_length": 1138.41201171875, "completions/min_length": 0.0, "completions/min_terminated_length": 332.8, "epoch": 0.39599505006187424, "grad_norm": 0.00032429303973913193, "learning_rate": 1.2951807228915664e-06, "loss": -0.0107, "num_tokens": 503671472.0, "reward": 1.053611421585083, "reward_std": 0.12947496324777602, "rewards/accuracy_reward": 0.6701388955116272, "rewards/brier_reward": 0.8113029956817627, "rewards/confidence_uniqueness_reward": 0.9313131213188172, "rewards/format_reward": 0.986718761920929, "rewards/frontier_aurc_reward": -0.0019012225093320012, "rewards/frontier_coverage_1": 0.0559473067522049, "rewards/frontier_coverage_10": 0.0559473067522049, "rewards/frontier_coverage_15": 0.05543726235628128, "rewards/frontier_coverage_20": 0.044922591745853425, "rewards/frontier_coverage_25": 0.0935636967420578, "rewards/frontier_coverage_5": 0.0559473067522049, "rewards/frontier_ece_reward": 0.007312025129795075, "signal/accuracy_reward/centered_abs_mean": 0.14447699785232543, "signal/accuracy_reward/group_std_mean": 0.19804940819740297, "signal/accuracy_reward/group_zero_std_frac": 0.4055555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07223849892616271, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07223849892616271, "signal/advantage_abs_mean": 0.09204500317573547, "signal/advantage_pre_scale_abs_mean": 0.09204500317573547, "signal/advantage_pre_scale_std": 0.16653842926025392, "signal/advantage_std": 0.16653842926025392, "signal/brier_reward/centered_abs_mean": 0.14590578377246857, "signal/brier_reward/group_std_mean": 0.1900397479534149, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01823822297155857, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01823822297155857, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04235764890909195, "signal/confidence_uniqueness_reward/group_std_mean": 0.06415605992078781, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005294706113636494, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005294706113636494, "signal/format_reward/centered_abs_mean": 0.022314453125, "signal/format_reward/group_std_mean": 0.04004841782152653, "signal/format_reward/group_zero_std_frac": 0.8416666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0111572265625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0111572265625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023163022473454476, "signal/frontier_aurc_reward/group_std_mean": 0.00396724371239543, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1461809087195436e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1461809087195436e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14388979077339173, "signal/frontier_coverage_1/group_std_mean": 0.2028069317340851, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002575627202168107, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002575627202168107, "signal/frontier_coverage_10/centered_abs_mean": 0.14388979077339173, "signal/frontier_coverage_10/group_std_mean": 0.2028069317340851, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002575627202168107, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002575627202168107, "signal/frontier_coverage_15/centered_abs_mean": 0.13867741376161574, "signal/frontier_coverage_15/group_std_mean": 0.19579787254333497, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024823257233947517, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024823257233947517, "signal/frontier_coverage_20/centered_abs_mean": 0.07798126637935639, "signal/frontier_coverage_20/group_std_mean": 0.10919245183467866, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013958646217361093, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013958646217361093, "signal/frontier_coverage_25/centered_abs_mean": 0.07589124590158462, "signal/frontier_coverage_25/group_std_mean": 0.09610055834054947, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013584532076492905, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013584532076492905, "signal/frontier_coverage_5/centered_abs_mean": 0.14388979077339173, "signal/frontier_coverage_5/group_std_mean": 0.2028069317340851, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002575627202168107, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002575627202168107, "signal/frontier_ece_reward/centered_abs_mean": 0.010885684378445148, "signal/frontier_ece_reward/group_std_mean": 0.013944818638265133, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013607105473056435, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013607105473056435, "step": 165 }, { "calibration/aurc": 0.14454796269342324, "calibration/batch_distribution_entropy": 0.7588273654201286, "calibration/buffer_distribution_entropy": 0.8704741506910846, "calibration/confidence_entropy": 0.36874330606179995, "calibration/coverage@0%": 0.009399541884816754, "calibration/coverage@1%": 0.009399541884816754, "calibration/coverage@10%": 0.32980529835742545, "calibration/coverage@15%": 0.6194528392530796, "calibration/coverage@20%": 0.8288237113841952, "calibration/coverage@25%": 0.919738063236047, "calibration/coverage@30%": 0.9756613756613757, "calibration/coverage@5%": 0.0987727704833249, "calibration/ece": 0.09647885897278856, "calibration/mean_confidence": 0.7515438098948212, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011111111111111117, "completions/max_length": 3841.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 1090.309814453125, "completions/mean_terminated_length": 1102.6180908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 284.6, "epoch": 0.4079949000637492, "grad_norm": 0.0003373819636180997, "learning_rate": 1.1445783132530121e-06, "loss": -0.0087, "num_tokens": 519321025.0, "reward": 1.0796474456787108, "reward_std": 0.12459334284067154, "rewards/accuracy_reward": 0.7208333492279053, "rewards/brier_reward": 0.8231194734573364, "rewards/confidence_uniqueness_reward": 0.9316921472549439, "rewards/format_reward": 0.9887152791023255, "rewards/frontier_aurc_reward": -0.001801234926097095, "rewards/frontier_coverage_1": 0.029103067331016065, "rewards/frontier_coverage_10": 0.029103067331016065, "rewards/frontier_coverage_15": 0.02983991215005517, "rewards/frontier_coverage_20": 0.036224594712257384, "rewards/frontier_coverage_25": 0.11188144534826279, "rewards/frontier_coverage_5": 0.029103067331016065, "rewards/frontier_ece_reward": 0.006446403171867132, "signal/accuracy_reward/centered_abs_mean": 0.14637586772441863, "signal/accuracy_reward/group_std_mean": 0.1990185409784317, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07318793386220931, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07318793386220931, "signal/advantage_abs_mean": 0.08974390029907227, "signal/advantage_pre_scale_abs_mean": 0.08974390029907227, "signal/advantage_pre_scale_std": 0.16255992650985718, "signal/advantage_std": 0.16255992650985718, "signal/brier_reward/centered_abs_mean": 0.13866532742977142, "signal/brier_reward/group_std_mean": 0.18006704449653627, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017333165928721427, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017333165928721427, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.040852177888154984, "signal/confidence_uniqueness_reward/group_std_mean": 0.059870512783527376, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005106522236019373, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005106522236019373, "signal/format_reward/centered_abs_mean": 0.018869357742369176, "signal/format_reward/group_std_mean": 0.033280248194932936, "signal/format_reward/group_zero_std_frac": 0.8694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009434678871184588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009434678871184588, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022833514027297496, "signal/frontier_aurc_reward/group_std_mean": 0.00370091856457293, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.087198940396775e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.087198940396775e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1318504437804222, "signal/frontier_coverage_1/group_std_mean": 0.18823845088481903, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023601229302585127, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023601229302585127, "signal/frontier_coverage_10/centered_abs_mean": 0.1318504437804222, "signal/frontier_coverage_10/group_std_mean": 0.18823845088481903, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023601229302585127, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023601229302585127, "signal/frontier_coverage_15/centered_abs_mean": 0.1259125664830208, "signal/frontier_coverage_15/group_std_mean": 0.18023517727851868, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002253834856674075, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002253834856674075, "signal/frontier_coverage_20/centered_abs_mean": 0.06886096596717835, "signal/frontier_coverage_20/group_std_mean": 0.09653576016426087, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012326112482696772, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012326112482696772, "signal/frontier_coverage_25/centered_abs_mean": 0.08277640789747238, "signal/frontier_coverage_25/group_std_mean": 0.10427123010158539, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014816976618021727, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014816976618021727, "signal/frontier_coverage_5/centered_abs_mean": 0.1318504437804222, "signal/frontier_coverage_5/group_std_mean": 0.18823845088481903, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023601229302585127, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023601229302585127, "signal/frontier_ece_reward/centered_abs_mean": 0.010013974830508232, "signal/frontier_ece_reward/group_std_mean": 0.012912089005112648, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001251746853813529, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001251746853813529, "step": 170 }, { "calibration/aurc": 0.13834606680918402, "calibration/batch_distribution_entropy": 0.8186999997361368, "calibration/buffer_distribution_entropy": 0.8639091261459949, "calibration/confidence_entropy": 0.3837927440740244, "calibration/coverage@0%": 0.031790703050268966, "calibration/coverage@1%": 0.031790703050268966, "calibration/coverage@10%": 0.39925962188917974, "calibration/coverage@15%": 0.6025120475540564, "calibration/coverage@20%": 0.8262619502896491, "calibration/coverage@25%": 0.9115204932253234, "calibration/coverage@30%": 0.9701405779774911, "calibration/coverage@5%": 0.1683305428786683, "calibration/ece": 0.08496535814356126, "calibration/mean_confidence": 0.7056412285906399, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012760416666666673, "completions/max_length": 3912.8, "completions/max_terminated_length": 3912.8, "completions/mean_length": 1137.7668701171874, "completions/mean_terminated_length": 1152.48974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 314.2, "epoch": 0.4199947500656242, "grad_norm": 0.00032794420258142054, "learning_rate": 9.93975903614458e-07, "loss": -0.0129, "num_tokens": 535536067.0, "reward": 1.0705443382263184, "reward_std": 0.13408834338188172, "rewards/accuracy_reward": 0.7054687380790711, "rewards/brier_reward": 0.8158458828926086, "rewards/confidence_uniqueness_reward": 0.9295023679733276, "rewards/format_reward": 0.9870659708976746, "rewards/frontier_aurc_reward": -0.0016766191460192204, "rewards/frontier_coverage_1": 0.03425406012684107, "rewards/frontier_coverage_10": 0.03425406012684107, "rewards/frontier_coverage_15": 0.03412420265376568, "rewards/frontier_coverage_20": 0.039517082273960114, "rewards/frontier_coverage_25": 0.12406121045351029, "rewards/frontier_coverage_5": 0.03425406012684107, "rewards/frontier_ece_reward": 0.0060807295143604275, "signal/accuracy_reward/centered_abs_mean": 0.15820854902267456, "signal/accuracy_reward/group_std_mean": 0.21389053761959076, "signal/accuracy_reward/group_zero_std_frac": 0.36666667461395264, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07910427451133728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07910427451133728, "signal/advantage_abs_mean": 0.09520394802093506, "signal/advantage_pre_scale_abs_mean": 0.09520394802093506, "signal/advantage_pre_scale_std": 0.17199627161026002, "signal/advantage_std": 0.17199627161026002, "signal/brier_reward/centered_abs_mean": 0.14463236629962922, "signal/brier_reward/group_std_mean": 0.19018857181072235, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018079045787453653, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018079045787453653, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04297049716114998, "signal/confidence_uniqueness_reward/group_std_mean": 0.066465725004673, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005371312145143747, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005371312145143747, "signal/format_reward/centered_abs_mean": 0.022140841744840146, "signal/format_reward/group_std_mean": 0.04171677567064762, "signal/format_reward/group_zero_std_frac": 0.8277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011070420872420073, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011070420872420073, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022307957988232374, "signal/frontier_aurc_reward/group_std_mean": 0.0038668690249323845, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9931246283231304e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9931246283231304e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14078425168991088, "signal/frontier_coverage_1/group_std_mean": 0.204370379447937, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002520037954673171, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002520037954673171, "signal/frontier_coverage_10/centered_abs_mean": 0.14078425168991088, "signal/frontier_coverage_10/group_std_mean": 0.204370379447937, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002520037954673171, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002520037954673171, "signal/frontier_coverage_15/centered_abs_mean": 0.13343214988708496, "signal/frontier_coverage_15/group_std_mean": 0.19431246519088746, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002388435346074402, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002388435346074402, "signal/frontier_coverage_20/centered_abs_mean": 0.07233781591057778, "signal/frontier_coverage_20/group_std_mean": 0.10254417657852173, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001294846786186099, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001294846786186099, "signal/frontier_coverage_25/centered_abs_mean": 0.08985466808080673, "signal/frontier_coverage_25/group_std_mean": 0.11489148437976837, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016083985101431608, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016083985101431608, "signal/frontier_coverage_5/centered_abs_mean": 0.14078425168991088, "signal/frontier_coverage_5/group_std_mean": 0.204370379447937, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002520037954673171, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002520037954673171, "signal/frontier_ece_reward/centered_abs_mean": 0.010127259977161884, "signal/frontier_ece_reward/group_std_mean": 0.013339119404554367, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012659074971452355, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012659074971452355, "step": 175 }, { "calibration/aurc": 0.1078750692864375, "calibration/batch_distribution_entropy": 0.7768469934330399, "calibration/buffer_distribution_entropy": 0.8621689599316819, "calibration/confidence_entropy": 0.3939859688531258, "calibration/coverage@0%": 0.06972233539436981, "calibration/coverage@1%": 0.06972233539436981, "calibration/coverage@10%": 0.5525677745061607, "calibration/coverage@15%": 0.7335402018406365, "calibration/coverage@20%": 0.8665403953716891, "calibration/coverage@25%": 0.9559681697612732, "calibration/coverage@30%": 0.9702917771883289, "calibration/coverage@5%": 0.27872282790800273, "calibration/ece": 0.08607349569510638, "calibration/mean_confidence": 0.7374003617662448, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014149305555555537, "completions/max_length": 3998.6, "completions/max_terminated_length": 3998.6, "completions/mean_length": 1096.2947998046875, "completions/mean_terminated_length": 1111.9990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 338.8, "epoch": 0.4319946000674992, "grad_norm": 0.00031256891088560224, "learning_rate": 8.433734939759036e-07, "loss": -0.012, "num_tokens": 551265351.0, "reward": 1.0728761911392213, "reward_std": 0.12901873141527176, "rewards/accuracy_reward": 0.7154513835906983, "rewards/brier_reward": 0.810655677318573, "rewards/confidence_uniqueness_reward": 0.9245447874069214, "rewards/format_reward": 0.9855902791023254, "rewards/frontier_aurc_reward": -0.0020415371283888815, "rewards/frontier_coverage_1": 0.022566875419579448, "rewards/frontier_coverage_10": 0.02261007858905941, "rewards/frontier_coverage_15": 0.02267046067863703, "rewards/frontier_coverage_20": 0.035761307924985886, "rewards/frontier_coverage_25": 0.14290719628334045, "rewards/frontier_coverage_5": 0.022566875419579448, "rewards/frontier_ece_reward": 0.005401916056871414, "signal/accuracy_reward/centered_abs_mean": 0.14884982407093048, "signal/accuracy_reward/group_std_mean": 0.19917932748794556, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07442491203546524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07442491203546524, "signal/advantage_abs_mean": 0.09345675855875016, "signal/advantage_pre_scale_abs_mean": 0.09345675855875016, "signal/advantage_pre_scale_std": 0.16980061829090118, "signal/advantage_std": 0.16980061829090118, "signal/brier_reward/centered_abs_mean": 0.13750295341014862, "signal/brier_reward/group_std_mean": 0.17973470985889434, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017187869176268578, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017187869176268578, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.044607821851968765, "signal/confidence_uniqueness_reward/group_std_mean": 0.06631096750497818, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005575977731496096, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005575977731496096, "signal/format_reward/centered_abs_mean": 0.02207031212747097, "signal/format_reward/group_std_mean": 0.03964213021099568, "signal/format_reward/group_zero_std_frac": 0.8388888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011035156063735485, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011035156063735485, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025864802300930023, "signal/frontier_aurc_reward/group_std_mean": 0.0044010514859110115, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.629799441318028e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.629799441318028e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12913522273302078, "signal/frontier_coverage_1/group_std_mean": 0.18910902738571167, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023115205112844706, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023115205112844706, "signal/frontier_coverage_10/centered_abs_mean": 0.12895990014076233, "signal/frontier_coverage_10/group_std_mean": 0.18887372612953185, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002308382326737046, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002308382326737046, "signal/frontier_coverage_15/centered_abs_mean": 0.1184653490781784, "signal/frontier_coverage_15/group_std_mean": 0.17463470101356507, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002120529650710523, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002120529650710523, "signal/frontier_coverage_20/centered_abs_mean": 0.06513682901859283, "signal/frontier_coverage_20/group_std_mean": 0.09168828129768372, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011659492505714298, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011659492505714298, "signal/frontier_coverage_25/centered_abs_mean": 0.10031676590442658, "signal/frontier_coverage_25/group_std_mean": 0.1278284102678299, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017956699943169952, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017956699943169952, "signal/frontier_coverage_5/centered_abs_mean": 0.12913522273302078, "signal/frontier_coverage_5/group_std_mean": 0.18910902738571167, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023115205112844706, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023115205112844706, "signal/frontier_ece_reward/centered_abs_mean": 0.009569591842591763, "signal/frontier_ece_reward/group_std_mean": 0.012618933990597724, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011961989803239703, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011961989803239703, "step": 180 }, { "calibration/aurc": 0.1905658711031758, "calibration/batch_distribution_entropy": 0.7511575429186707, "calibration/buffer_distribution_entropy": 0.8596426578480262, "calibration/confidence_entropy": 0.38262879225186686, "calibration/coverage@0%": 0.012010443864229765, "calibration/coverage@1%": 0.012010443864229765, "calibration/coverage@10%": 0.18892950391644908, "calibration/coverage@15%": 0.5163960795428519, "calibration/coverage@20%": 0.7123345333173191, "calibration/coverage@25%": 0.7931292530992373, "calibration/coverage@30%": 0.8088772845953003, "calibration/coverage@5%": 0.09822729146626356, "calibration/ece": 0.10218960656261426, "calibration/mean_confidence": 0.7544263684590896, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012413194444444442, "completions/max_length": 4026.4, "completions/max_terminated_length": 4026.4, "completions/mean_length": 1112.1637451171875, "completions/mean_terminated_length": 1126.181298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 306.6, "epoch": 0.44399445006937416, "grad_norm": 0.0003794998920056969, "learning_rate": 6.927710843373495e-07, "loss": -0.0089, "num_tokens": 567167525.0, "reward": 1.0585483074188233, "reward_std": 0.13896091282367706, "rewards/accuracy_reward": 0.68046875, "rewards/brier_reward": 0.8080840945243836, "rewards/confidence_uniqueness_reward": 0.9286048769950866, "rewards/format_reward": 0.9873263835906982, "rewards/frontier_aurc_reward": -0.002264450001530349, "rewards/frontier_coverage_1": 0.04679740741848946, "rewards/frontier_coverage_10": 0.04667307548224926, "rewards/frontier_coverage_15": 0.045523762702941895, "rewards/frontier_coverage_20": 0.046609895676374434, "rewards/frontier_coverage_25": 0.15252834260463716, "rewards/frontier_coverage_5": 0.04679740741848946, "rewards/frontier_ece_reward": 0.005719311535358429, "signal/accuracy_reward/centered_abs_mean": 0.16688910722732545, "signal/accuracy_reward/group_std_mean": 0.2191845268011093, "signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08344455361366272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08344455361366272, "signal/advantage_abs_mean": 0.10244468003511428, "signal/advantage_pre_scale_abs_mean": 0.10244468003511428, "signal/advantage_pre_scale_std": 0.17401386499404908, "signal/advantage_std": 0.17401386499404908, "signal/brier_reward/centered_abs_mean": 0.1530242681503296, "signal/brier_reward/group_std_mean": 0.19397515952587127, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0191280335187912, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0191280335187912, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04111187309026718, "signal/confidence_uniqueness_reward/group_std_mean": 0.0640983261168003, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005138984136283398, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005138984136283398, "signal/format_reward/centered_abs_mean": 0.02018229141831398, "signal/format_reward/group_std_mean": 0.03944449722766876, "signal/format_reward/group_zero_std_frac": 0.830555546283722, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01009114570915699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01009114570915699, "signal/frontier_aurc_reward/centered_abs_mean": 0.002950982470065355, "signal/frontier_aurc_reward/group_std_mean": 0.004983780579641462, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.282258280203678e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.282258280203678e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14425066113471985, "signal/frontier_coverage_1/group_std_mean": 0.20719496309757232, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025820867624133824, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025820867624133824, "signal/frontier_coverage_10/centered_abs_mean": 0.14380578100681304, "signal/frontier_coverage_10/group_std_mean": 0.20660340785980225, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002574123442173004, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002574123442173004, "signal/frontier_coverage_15/centered_abs_mean": 0.12744852900505066, "signal/frontier_coverage_15/group_std_mean": 0.18429652452468873, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022813286632299423, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022813286632299423, "signal/frontier_coverage_20/centered_abs_mean": 0.07100907564163209, "signal/frontier_coverage_20/group_std_mean": 0.0970319539308548, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001271062414161861, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001271062414161861, "signal/frontier_coverage_25/centered_abs_mean": 0.11521650403738022, "signal/frontier_coverage_25/group_std_mean": 0.1455530434846878, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020623753778636457, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020623753778636457, "signal/frontier_coverage_5/centered_abs_mean": 0.14425066113471985, "signal/frontier_coverage_5/group_std_mean": 0.20719496309757232, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025820867624133824, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025820867624133824, "signal/frontier_ece_reward/centered_abs_mean": 0.00979881975799799, "signal/frontier_ece_reward/group_std_mean": 0.012976471707224846, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012248524697497487, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012248524697497487, "step": 185 }, { "calibration/aurc": 0.16030641773857301, "calibration/batch_distribution_entropy": 0.7187946309257993, "calibration/buffer_distribution_entropy": 0.8544406875935249, "calibration/confidence_entropy": 0.35407681338799846, "calibration/coverage@0%": 0.06611951878057401, "calibration/coverage@1%": 0.06611951878057401, "calibration/coverage@10%": 0.3717757925491453, "calibration/coverage@15%": 0.43330493174863155, "calibration/coverage@20%": 0.724023008873238, "calibration/coverage@25%": 0.8848319190600522, "calibration/coverage@30%": 0.9640407419495214, "calibration/coverage@5%": 0.19347886064294592, "calibration/ece": 0.10143865496049813, "calibration/mean_confidence": 0.7697417873464006, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333304, "completions/max_length": 3704.2, "completions/max_terminated_length": 3704.2, "completions/mean_length": 1097.49306640625, "completions/mean_terminated_length": 1107.6218994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 304.2, "epoch": 0.45599430007124914, "grad_norm": 0.00037333081127144396, "learning_rate": 5.421686746987952e-07, "loss": -0.0071, "num_tokens": 582893589.0, "reward": 1.0882224082946776, "reward_std": 0.12903861999511718, "rewards/accuracy_reward": 0.7315972208976745, "rewards/brier_reward": 0.8292945265769959, "rewards/confidence_uniqueness_reward": 0.9284190654754638, "rewards/format_reward": 0.9907986044883728, "rewards/frontier_aurc_reward": -0.0020672645885497333, "rewards/frontier_coverage_1": 0.03171844305470586, "rewards/frontier_coverage_10": 0.03166983062401414, "rewards/frontier_coverage_15": 0.03243658747524023, "rewards/frontier_coverage_20": 0.04914712235331535, "rewards/frontier_coverage_25": 0.19652676284313203, "rewards/frontier_coverage_5": 0.03171844305470586, "rewards/frontier_ece_reward": 0.005333344265818596, "signal/accuracy_reward/centered_abs_mean": 0.16028645932674407, "signal/accuracy_reward/group_std_mean": 0.21160895824432374, "signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08014322966337203, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08014322966337203, "signal/advantage_abs_mean": 0.09539956450462342, "signal/advantage_pre_scale_abs_mean": 0.09539956450462342, "signal/advantage_pre_scale_std": 0.16672308146953582, "signal/advantage_std": 0.16672308146953582, "signal/brier_reward/centered_abs_mean": 0.13682476282119752, "signal/brier_reward/group_std_mean": 0.18165784180164338, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01710309535264969, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01710309535264969, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03870115876197815, "signal/confidence_uniqueness_reward/group_std_mean": 0.056563211232423784, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004837644845247268, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004837644845247268, "signal/format_reward/centered_abs_mean": 0.015277777425944805, "signal/format_reward/group_std_mean": 0.02817566618323326, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007638888712972402, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007638888712972402, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026700781658291818, "signal/frontier_aurc_reward/group_std_mean": 0.004712453950196505, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.779439841513522e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.779439841513522e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13049551248550414, "signal/frontier_coverage_1/group_std_mean": 0.19274420142173768, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002335869614034891, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002335869614034891, "signal/frontier_coverage_10/centered_abs_mean": 0.12981376349925994, "signal/frontier_coverage_10/group_std_mean": 0.191838139295578, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002323666214942932, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002323666214942932, "signal/frontier_coverage_15/centered_abs_mean": 0.11072092205286026, "signal/frontier_coverage_15/group_std_mean": 0.16516720950603486, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019819044275209306, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019819044275209306, "signal/frontier_coverage_20/centered_abs_mean": 0.06475888639688492, "signal/frontier_coverage_20/group_std_mean": 0.08885233402252198, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011591840535402297, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011591840535402297, "signal/frontier_coverage_25/centered_abs_mean": 0.12515371143817902, "signal/frontier_coverage_25/group_std_mean": 0.15882596969604493, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002240251423791051, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002240251423791051, "signal/frontier_coverage_5/centered_abs_mean": 0.13049551248550414, "signal/frontier_coverage_5/group_std_mean": 0.19274420142173768, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002335869614034891, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335869614034891, "signal/frontier_ece_reward/centered_abs_mean": 0.008784758672118188, "signal/frontier_ece_reward/group_std_mean": 0.011800924316048622, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010980948340147735, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010980948340147735, "step": 190 }, { "calibration/aurc": 0.19186302721828583, "calibration/batch_distribution_entropy": 0.7846974041619017, "calibration/buffer_distribution_entropy": 0.8455623219637705, "calibration/confidence_entropy": 0.36892445483210545, "calibration/coverage@0%": 0.011578947368421053, "calibration/coverage@1%": 0.011578947368421053, "calibration/coverage@10%": 0.28315277511204595, "calibration/coverage@15%": 0.4408358028815268, "calibration/coverage@20%": 0.5483091688800866, "calibration/coverage@25%": 0.6822674896809898, "calibration/coverage@30%": 0.8453754176191314, "calibration/coverage@5%": 0.088427923594888, "calibration/ece": 0.12225029143210824, "calibration/mean_confidence": 0.7202651758038046, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011892361111111116, "completions/max_length": 3729.2, "completions/max_terminated_length": 3729.2, "completions/mean_length": 1130.487158203125, "completions/mean_terminated_length": 1144.25771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 268.8, "epoch": 0.46799415007312406, "grad_norm": 0.00035123800626024604, "learning_rate": 3.91566265060241e-07, "loss": -0.0106, "num_tokens": 598997665.0, "reward": 1.0558034181594849, "reward_std": 0.13249356299638748, "rewards/accuracy_reward": 0.676649296283722, "rewards/brier_reward": 0.7974535226821899, "rewards/confidence_uniqueness_reward": 0.9281538367271424, "rewards/format_reward": 0.9881076335906982, "rewards/frontier_aurc_reward": -0.002629127446562052, "rewards/frontier_coverage_1": 0.0432399183511734, "rewards/frontier_coverage_10": 0.043241331726312636, "rewards/frontier_coverage_15": 0.04129153192043304, "rewards/frontier_coverage_20": 0.04897095337510109, "rewards/frontier_coverage_25": 0.17610829174518586, "rewards/frontier_coverage_5": 0.0432399183511734, "rewards/frontier_ece_reward": 0.005448419880121946, "signal/accuracy_reward/centered_abs_mean": 0.1535861536860466, "signal/accuracy_reward/group_std_mean": 0.20312528014183046, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0767930768430233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0767930768430233, "signal/advantage_abs_mean": 0.09537107646465301, "signal/advantage_pre_scale_abs_mean": 0.09537107646465301, "signal/advantage_pre_scale_std": 0.1679329752922058, "signal/advantage_std": 0.1679329752922058, "signal/brier_reward/centered_abs_mean": 0.15391360223293304, "signal/brier_reward/group_std_mean": 0.2022032171487808, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01923920027911663, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01923920027911663, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.041540400683879854, "signal/confidence_uniqueness_reward/group_std_mean": 0.06364303082227707, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005192550085484982, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005192550085484982, "signal/format_reward/centered_abs_mean": 0.020144314132630826, "signal/format_reward/group_std_mean": 0.03839278891682625, "signal/format_reward/group_zero_std_frac": 0.8444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010072157066315413, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010072157066315413, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032725514844059943, "signal/frontier_aurc_reward/group_std_mean": 0.005476425681263208, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.857866490259767e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.857866490259767e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14388231039047242, "signal/frontier_coverage_1/group_std_mean": 0.21113406419754027, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00257549318484962, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00257549318484962, "signal/frontier_coverage_10/centered_abs_mean": 0.14277739822864532, "signal/frontier_coverage_10/group_std_mean": 0.20962927639484405, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002555715246126056, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002555715246126056, "signal/frontier_coverage_15/centered_abs_mean": 0.12085210084915161, "signal/frontier_coverage_15/group_std_mean": 0.17947884798049926, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021632524440065026, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021632524440065026, "signal/frontier_coverage_20/centered_abs_mean": 0.07110893502831458, "signal/frontier_coverage_20/group_std_mean": 0.09753008931875229, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012728499248623848, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012728499248623848, "signal/frontier_coverage_25/centered_abs_mean": 0.13257131576538086, "signal/frontier_coverage_25/group_std_mean": 0.16897372305393218, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023730265442281962, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023730265442281962, "signal/frontier_coverage_5/centered_abs_mean": 0.14388231039047242, "signal/frontier_coverage_5/group_std_mean": 0.21113406419754027, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00257549318484962, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00257549318484962, "signal/frontier_ece_reward/centered_abs_mean": 0.009696776419878006, "signal/frontier_ece_reward/group_std_mean": 0.013027152419090271, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012120970524847508, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012120970524847508, "step": 195 }, { "calibration/aurc": 0.15151234797804142, "calibration/batch_distribution_entropy": 0.735099965996475, "calibration/buffer_distribution_entropy": 0.8387114189183977, "calibration/confidence_entropy": 0.37467308471707284, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.33631391158785745, "calibration/coverage@15%": 0.6785776451572796, "calibration/coverage@20%": 0.7966761987649715, "calibration/coverage@25%": 0.8593974056115048, "calibration/coverage@30%": 0.9433862433862433, "calibration/coverage@5%": 0.13511235377562142, "calibration/ece": 0.10459575666056162, "calibration/mean_confidence": 0.7707032420511629, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333348, "completions/max_length": 3905.4, "completions/max_terminated_length": 3905.4, "completions/mean_length": 1109.669580078125, "completions/mean_terminated_length": 1119.9466552734375, "completions/min_length": 0.0, "completions/min_terminated_length": 277.4, "epoch": 0.47999400007499904, "grad_norm": 0.000335763645125553, "learning_rate": 2.409638554216868e-07, "loss": -0.0077, "num_tokens": 614848866.0, "reward": 1.071366262435913, "reward_std": 0.1267540842294693, "rewards/accuracy_reward": 0.6993055582046509, "rewards/brier_reward": 0.8146753191947937, "rewards/confidence_uniqueness_reward": 0.9305232167243958, "rewards/format_reward": 0.990625, "rewards/frontier_aurc_reward": -0.002462388901039958, "rewards/frontier_coverage_1": 0.04432640373706818, "rewards/frontier_coverage_10": 0.04419338628649712, "rewards/frontier_coverage_15": 0.04251908585429191, "rewards/frontier_coverage_20": 0.05172077566385269, "rewards/frontier_coverage_25": 0.19664104282855988, "rewards/frontier_coverage_5": 0.04432640373706818, "rewards/frontier_ece_reward": 0.005684023071080446, "signal/accuracy_reward/centered_abs_mean": 0.14978298395872117, "signal/accuracy_reward/group_std_mean": 0.1975135862827301, "signal/accuracy_reward/group_zero_std_frac": 0.43888888955116273, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07489149197936058, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07489149197936058, "signal/advantage_abs_mean": 0.09207341372966767, "signal/advantage_pre_scale_abs_mean": 0.09207341372966767, "signal/advantage_pre_scale_std": 0.1636202573776245, "signal/advantage_std": 0.1636202573776245, "signal/brier_reward/centered_abs_mean": 0.14286952763795852, "signal/brier_reward/group_std_mean": 0.18820186257362365, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017858690954744814, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017858690954744814, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03856443092226982, "signal/confidence_uniqueness_reward/group_std_mean": 0.05923122763633728, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0048205538652837275, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0048205538652837275, "signal/format_reward/centered_abs_mean": 0.016167534701526164, "signal/format_reward/group_std_mean": 0.03279493264853954, "signal/format_reward/group_zero_std_frac": 0.8555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008083767350763082, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008083767350763082, "signal/frontier_aurc_reward/centered_abs_mean": 0.003109201230108738, "signal/frontier_aurc_reward/group_std_mean": 0.005194711685180664, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.565470055444166e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.565470055444166e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1378859430551529, "signal/frontier_coverage_1/group_std_mean": 0.20053677260875702, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024681583512574435, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024681583512574435, "signal/frontier_coverage_10/centered_abs_mean": 0.13636807948350907, "signal/frontier_coverage_10/group_std_mean": 0.19854426085948945, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024409884586930274, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024409884586930274, "signal/frontier_coverage_15/centered_abs_mean": 0.11440031677484512, "signal/frontier_coverage_15/group_std_mean": 0.16812679767608643, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002047765627503395, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002047765627503395, "signal/frontier_coverage_20/centered_abs_mean": 0.06679626852273941, "signal/frontier_coverage_20/group_std_mean": 0.09091974049806595, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011956531554460526, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011956531554460526, "signal/frontier_coverage_25/centered_abs_mean": 0.13488344550132753, "signal/frontier_coverage_25/group_std_mean": 0.17190809845924376, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024144135415554045, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024144135415554045, "signal/frontier_coverage_5/centered_abs_mean": 0.1378859430551529, "signal/frontier_coverage_5/group_std_mean": 0.20053677260875702, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024681583512574435, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024681583512574435, "signal/frontier_ece_reward/centered_abs_mean": 0.009192906878888607, "signal/frontier_ece_reward/group_std_mean": 0.01221153773367405, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011491133598610758, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011491133598610758, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.15298984535701007, "eval_calibration/batch_distribution_entropy": 0.6891585641828614, "eval_calibration/buffer_distribution_entropy": 0.8339226172973552, "eval_calibration/confidence_entropy": 0.34052887972179385, "eval_calibration/coverage@0%": 0.14700940860215053, "eval_calibration/coverage@1%": 0.14700940860215053, "eval_calibration/coverage@10%": 0.4490927419354838, "eval_calibration/coverage@15%": 0.6940524193548386, "eval_calibration/coverage@20%": 0.827116935483871, "eval_calibration/coverage@25%": 0.8949932795698925, "eval_calibration/coverage@30%": 0.9734543010752689, "eval_calibration/coverage@5%": 0.14700940860215053, "eval_calibration/ece": 0.1637371228983639, "eval_calibration/mean_confidence": 0.7425752090908834, "eval_completions/clipped_ratio": 0.016319444444444442, "eval_completions/max_length": 3322.0, "eval_completions/max_terminated_length": 3322.0, "eval_completions/mean_length": 1099.1554361979167, "eval_completions/mean_terminated_length": 1117.191874186198, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 364.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 614848866.0, "eval_reward": 1.0577454765637715, "eval_reward_std": 0.2678757707277934, "eval_rewards/accuracy_reward": 0.6918402711550394, "eval_rewards/brier_reward": 0.8075468341509501, "eval_rewards/confidence_uniqueness_reward": 0.8762139777342478, "eval_rewards/format_reward": 0.9861111044883728, "eval_rewards/frontier_aurc_reward": -0.002283926723369708, "eval_rewards/frontier_coverage_1": 0.04415246595939001, "eval_rewards/frontier_coverage_10": 0.04416414389076332, "eval_rewards/frontier_coverage_15": 0.04174827644601464, "eval_rewards/frontier_coverage_20": 0.051496884475151695, "eval_rewards/frontier_coverage_25": 0.20257077117760977, "eval_rewards/frontier_coverage_5": 0.04415246595939001, "eval_rewards/frontier_ece_reward": 0.005393590700502197, "eval_runtime": 213.6534, "eval_samples_per_second": 4.68, "eval_signal/accuracy_reward/centered_abs_mean": 0.4120551198720932, "eval_signal/accuracy_reward/group_std_mean": 0.459818700949351, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2060275599360466, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2060275599360466, "eval_signal/advantage_abs_mean": 0.2272669697801272, "eval_signal/advantage_pre_scale_abs_mean": 0.2272669697801272, "eval_signal/advantage_pre_scale_std": 0.2672336275378863, "eval_signal/advantage_std": 0.2672336275378863, "eval_signal/brier_reward/centered_abs_mean": 0.23803439736366272, "eval_signal/brier_reward/group_std_mean": 0.30198956032594043, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02975429967045784, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02975429967045784, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06008566605548064, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09701731304327647, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00751070825693508, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00751070825693508, "eval_signal/format_reward/centered_abs_mean": 0.026475694496184587, "eval_signal/format_reward/group_std_mean": 0.06907285718868177, "eval_signal/format_reward/group_zero_std_frac": 0.6388889054457346, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013237847248092294, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.013237847248092294, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003976293024607003, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008325919586544236, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.117564200598281e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.117564200598281e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1956762745976448, "eval_signal/frontier_coverage_1/group_std_mean": 0.32742465535799664, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035026053277154765, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035026053277154765, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.19371677935123444, "eval_signal/frontier_coverage_10/group_std_mean": 0.3244449843962987, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003467530244961381, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003467530244961381, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.16138583918412527, "eval_signal/frontier_coverage_15/group_std_mean": 0.27520785232385, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028888065523157516, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028888065523157516, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.08761031304796536, "eval_signal/frontier_coverage_20/group_std_mean": 0.13085574780901274, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015682245721109211, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015682245721109211, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2675568262736003, "eval_signal/frontier_coverage_25/group_std_mean": 0.3178121993939082, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00478926720097661, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00478926720097661, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.1956762745976448, "eval_signal/frontier_coverage_5/group_std_mean": 0.32742465535799664, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035026053277154765, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035026053277154765, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.010918460320681334, "eval_signal/frontier_ece_reward/group_std_mean": 0.0168185291501383, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013648075400851667, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013648075400851667, "eval_steps_per_second": 0.028, "step": 200 }, { "calibration/aurc": 0.18313683195958363, "calibration/batch_distribution_entropy": 0.7309309125681636, "calibration/buffer_distribution_entropy": 0.8321117344650947, "calibration/confidence_entropy": 0.36200907857604947, "calibration/coverage@0%": 0.0058823529411764705, "calibration/coverage@1%": 0.0058823529411764705, "calibration/coverage@10%": 0.188358492065308, "calibration/coverage@15%": 0.5072439560443204, "calibration/coverage@20%": 0.6427671877795685, "calibration/coverage@25%": 0.8473311156960994, "calibration/coverage@30%": 0.9155880247589371, "calibration/coverage@5%": 0.0058823529411764705, "calibration/ece": 0.10859277111780778, "calibration/mean_confidence": 0.7655174734872644, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011545138888888862, "completions/max_length": 3830.8, "completions/max_terminated_length": 3830.8, "completions/mean_length": 1124.1480224609375, "completions/mean_terminated_length": 1137.24013671875, "completions/min_length": 0.0, "completions/min_terminated_length": 313.4, "epoch": 0.491993850076874, "grad_norm": 0.00028088607359677553, "learning_rate": 9.036144578313253e-08, "loss": -0.0101, "num_tokens": 630865003.0, "reward": 1.0949723958969115, "reward_std": 0.12291131764650345, "rewards/accuracy_reward": 0.7439236164093017, "rewards/brier_reward": 0.8352924466133118, "rewards/confidence_uniqueness_reward": 0.9271109342575073, "rewards/format_reward": 0.98828125, "rewards/frontier_aurc_reward": -0.0017190412618219852, "rewards/frontier_coverage_1": 0.03267882689833641, "rewards/frontier_coverage_10": 0.03304030448198318, "rewards/frontier_coverage_15": 0.03327622413635254, "rewards/frontier_coverage_20": 0.057910379767417905, "rewards/frontier_coverage_25": 0.25257430374622347, "rewards/frontier_coverage_5": 0.03267882689833641, "rewards/frontier_ece_reward": 0.005485412012785673, "signal/accuracy_reward/centered_abs_mean": 0.1423394113779068, "signal/accuracy_reward/group_std_mean": 0.1927516996860504, "signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0711697056889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0711697056889534, "signal/advantage_abs_mean": 0.08618586659431457, "signal/advantage_pre_scale_abs_mean": 0.08618586659431457, "signal/advantage_pre_scale_std": 0.16105275750160217, "signal/advantage_std": 0.16105275750160217, "signal/brier_reward/centered_abs_mean": 0.13963095247745513, "signal/brier_reward/group_std_mean": 0.18516895473003386, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01745386905968189, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01745386905968189, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04072900265455246, "signal/confidence_uniqueness_reward/group_std_mean": 0.06390283033251762, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005091125331819058, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005091125331819058, "signal/format_reward/centered_abs_mean": 0.019070095382630825, "signal/format_reward/group_std_mean": 0.03828651420772076, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009535047691315413, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009535047691315413, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022460675798356535, "signal/frontier_aurc_reward/group_std_mean": 0.003971707401797175, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.020460764877498e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.020460764877498e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1471869170665741, "signal/frontier_coverage_1/group_std_mean": 0.21187746226787568, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026346457190811632, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026346457190811632, "signal/frontier_coverage_10/centered_abs_mean": 0.14501943588256835, "signal/frontier_coverage_10/group_std_mean": 0.2089345246553421, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025958478916436436, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025958478916436436, "signal/frontier_coverage_15/centered_abs_mean": 0.11886921375989914, "signal/frontier_coverage_15/group_std_mean": 0.17266935706138611, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021277590189129115, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021277590189129115, "signal/frontier_coverage_20/centered_abs_mean": 0.07114373296499252, "signal/frontier_coverage_20/group_std_mean": 0.09532740265130997, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012734727468341589, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012734727468341589, "signal/frontier_coverage_25/centered_abs_mean": 0.1383225828409195, "signal/frontier_coverage_25/group_std_mean": 0.1785325288772583, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002475974103435874, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002475974103435874, "signal/frontier_coverage_5/centered_abs_mean": 0.1471869170665741, "signal/frontier_coverage_5/group_std_mean": 0.21187746226787568, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026346457190811632, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026346457190811632, "signal/frontier_ece_reward/centered_abs_mean": 0.009521047584712505, "signal/frontier_ece_reward/group_std_mean": 0.012699404545128346, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011901309480890632, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011901309480890632, "step": 205 }, { "calibration/aurc": 0.15099192703946585, "calibration/batch_distribution_entropy": 0.7362200352954403, "calibration/buffer_distribution_entropy": 0.8299266209675906, "calibration/confidence_entropy": 0.3875184856441491, "calibration/coverage@0%": 0.03849518810148731, "calibration/coverage@1%": 0.03849518810148731, "calibration/coverage@10%": 0.2589676290463692, "calibration/coverage@15%": 0.4965119491642492, "calibration/coverage@20%": 0.8815515350877193, "calibration/coverage@25%": 0.9703399122807017, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.07611548556430446, "calibration/ece": 0.08016838343231748, "calibration/mean_confidence": 0.771572898167351, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006655092592592597, "completions/max_length": 3825.3333333333335, "completions/max_terminated_length": 3825.3333333333335, "completions/mean_length": 1118.4932250976562, "completions/mean_terminated_length": 1126.0267740885417, "completions/min_length": 0.0, "completions/min_terminated_length": 286.6666666666667, "epoch": 0.49919376007799904, "num_tokens": 640462076.0, "reward": 1.0744483868281047, "reward_std": 0.12696021795272827, "rewards/accuracy_reward": 0.7048611044883728, "rewards/brier_reward": 0.8086513876914978, "rewards/confidence_uniqueness_reward": 0.934039036432902, "rewards/format_reward": 0.9932002226511637, "rewards/frontier_aurc_reward": -0.002406407419281701, "rewards/frontier_coverage_1": 0.030984345202644665, "rewards/frontier_coverage_10": 0.030985131859779358, "rewards/frontier_coverage_15": 0.03134462299446265, "rewards/frontier_coverage_20": 0.04987387855847677, "rewards/frontier_coverage_25": 0.21809764703114828, "rewards/frontier_coverage_5": 0.030984345202644665, "rewards/frontier_ece_reward": 0.004822776031990846, "signal/accuracy_reward/centered_abs_mean": 0.1598126416405042, "signal/accuracy_reward/group_std_mean": 0.2098775009314219, "signal/accuracy_reward/group_zero_std_frac": 0.407407412926356, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0799063208202521, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0799063208202521, "signal/advantage_abs_mean": 0.09329408903916676, "signal/advantage_pre_scale_abs_mean": 0.09329408903916676, "signal/advantage_pre_scale_std": 0.16142626603444418, "signal/advantage_std": 0.16142626603444418, "signal/brier_reward/centered_abs_mean": 0.14758913467327753, "signal/brier_reward/group_std_mean": 0.18945661187171936, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01844864183415969, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01844864183415969, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03421806792418162, "signal/confidence_uniqueness_reward/group_std_mean": 0.05302715301513672, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004277258490522702, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004277258490522702, "signal/format_reward/centered_abs_mean": 0.01250542514026165, "signal/format_reward/group_std_mean": 0.027382840712865193, "signal/format_reward/group_zero_std_frac": 0.875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006252712570130825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006252712570130825, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030548664896438518, "signal/frontier_aurc_reward/group_std_mean": 0.005336549288282792, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.468210899077045e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.468210899077045e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14464224129915237, "signal/frontier_coverage_1/group_std_mean": 0.20606282353401184, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002589096004764239, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002589096004764239, "signal/frontier_coverage_10/centered_abs_mean": 0.14266284555196762, "signal/frontier_coverage_10/group_std_mean": 0.20341384410858154, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002553664923955997, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002553664923955997, "signal/frontier_coverage_15/centered_abs_mean": 0.11402915418148041, "signal/frontier_coverage_15/group_std_mean": 0.1643607368071874, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020411216343442598, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020411216343442598, "signal/frontier_coverage_20/centered_abs_mean": 0.07020012413462003, "signal/frontier_coverage_20/group_std_mean": 0.09288557122151057, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001256582133161525, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001256582133161525, "signal/frontier_coverage_25/centered_abs_mean": 0.15206469098726907, "signal/frontier_coverage_25/group_std_mean": 0.19445708394050598, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027219578623771667, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027219578623771667, "signal/frontier_coverage_5/centered_abs_mean": 0.14464224129915237, "signal/frontier_coverage_5/group_std_mean": 0.20606282353401184, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002589096004764239, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002589096004764239, "signal/frontier_ece_reward/centered_abs_mean": 0.009401047912736734, "signal/frontier_ece_reward/group_std_mean": 0.012525786645710468, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011751309890920918, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011751309890920918, "step": 208, "total_flos": 0.0, "train_loss": -0.015914715025036667, "train_runtime": 51338.4996, "train_samples_per_second": 0.292, "train_steps_per_second": 0.004 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 640462076, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }