{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5102160159904601, "calibration/batch_distribution_entropy": 0.2895044023882817, "calibration/batch_entropy_100bins": 0.354793971262317, "calibration/batch_entropy_10bins": 0.2895044023882817, "calibration/batch_entropy_50bins": 0.4107426911912116, "calibration/batch_uniqueness": 0.5093545043561868, "calibration/confidence_entropy": 0.22127973516932448, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.46712570461757996, "calibration/mean_confidence": 0.9131820617407878, "calibration/prompt_uniqueness": 0.3733117777662954, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021788194444444464, "completions/max_length": 4062.8, "completions/max_terminated_length": 4062.8, "completions/mean_length": 515.3322021484375, "completions/mean_terminated_length": 526.8361450195313, "completions/min_length": 0.0, "completions/min_terminated_length": 2.8, "epoch": 0.011999850001874977, "grad_norm": 0.00417915266007185, "learning_rate": 5.952380952380953e-07, "loss": 0.0033, "num_tokens": 9050835.0, "reward": 0.4895077347755432, "reward_std": 0.4509033739566803, "rewards/accuracy_reward": 0.26111110746860505, "rewards/brier_reward": 0.3134358525276184, "rewards/confidence_uniqueness_reward": 0.292756462097168, "rewards/format_reward": 0.6008680462837219, "rewards/frontier_aurc_reward": 0.2758327066898346, "rewards/frontier_coverage_0": 0.2758327066898346, "rewards/frontier_coverage_1": 0.2758327066898346, "rewards/frontier_coverage_10": 0.2758327066898346, "rewards/frontier_coverage_15": 0.2758327066898346, "rewards/frontier_coverage_20": 0.2758327066898346, "rewards/frontier_coverage_25": 0.2758327066898346, "rewards/frontier_coverage_5": 0.2758327066898346, "rewards/frontier_ece_reward": 0.2758327066898346, "rewards/frontier_entropy_batch_reward": -0.5726763129234314, "signal/accuracy_reward/centered_abs_mean": 0.30725911259651184, "signal/accuracy_reward/group_bin_occupancy": 0.23958333333333334, "signal/accuracy_reward/group_std_mean": 0.3683225452899933, "signal/accuracy_reward/group_zero_std_frac": 0.08333333507180214, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15362955629825592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15362955629825592, "signal/advantage_abs_mean": 0.3866611957550049, "signal/advantage_pre_scale_abs_mean": 0.3866611957550049, "signal/advantage_pre_scale_std": 0.45703948736190797, "signal/advantage_std": 0.45703948736190797, "signal/brier_reward/centered_abs_mean": 0.3180504024028778, "signal/brier_reward/group_bin_occupancy": 0.5190972222222222, "signal/brier_reward/group_std_mean": 0.371950763463974, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03180503956973553, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03180503956973553, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23733226656913758, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6104166666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.2896383464336395, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023733228072524072, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023733228072524072, "signal/format_reward/centered_abs_mean": 0.4396592855453491, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.47461998462677, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21982964277267455, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21982964277267455, "signal/frontier_aurc_reward/centered_abs_mean": 0.3085459768772125, "signal/frontier_aurc_reward/group_bin_occupancy": 0.3993055555555555, "signal/frontier_aurc_reward/group_std_mean": 0.36748775839805603, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_0/centered_abs_mean": 0.3085459768772125, "signal/frontier_coverage_0/group_bin_occupancy": 0.3993055555555555, "signal/frontier_coverage_0/group_std_mean": 0.36748775839805603, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_1/centered_abs_mean": 0.3085459768772125, "signal/frontier_coverage_1/group_bin_occupancy": 0.3993055555555555, "signal/frontier_coverage_1/group_std_mean": 0.36748775839805603, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_10/centered_abs_mean": 0.3085459768772125, "signal/frontier_coverage_10/group_bin_occupancy": 0.3993055555555555, "signal/frontier_coverage_10/group_std_mean": 0.36748775839805603, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_15/centered_abs_mean": 0.3085459768772125, "signal/frontier_coverage_15/group_bin_occupancy": 0.3993055555555555, "signal/frontier_coverage_15/group_std_mean": 0.36748775839805603, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_20/centered_abs_mean": 0.3085459768772125, "signal/frontier_coverage_20/group_bin_occupancy": 0.3993055555555555, "signal/frontier_coverage_20/group_std_mean": 0.36748775839805603, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_25/centered_abs_mean": 0.3085459768772125, "signal/frontier_coverage_25/group_bin_occupancy": 0.3993055555555555, "signal/frontier_coverage_25/group_std_mean": 0.36748775839805603, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_5/centered_abs_mean": 0.3085459768772125, "signal/frontier_coverage_5/group_bin_occupancy": 0.3993055555555555, "signal/frontier_coverage_5/group_std_mean": 0.36748775839805603, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003856824804097414, "signal/frontier_ece_reward/centered_abs_mean": 0.3085459768772125, "signal/frontier_ece_reward/group_bin_occupancy": 0.3993055555555555, "signal/frontier_ece_reward/group_std_mean": 0.36748775839805603, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030854598432779313, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030854598432779313, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44998674988746645, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2989583333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4826855003833771, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04499867707490921, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04499867707490921, "step": 5 }, { "calibration/aurc": 0.5014903573254282, "calibration/batch_distribution_entropy": 0.2653753261224933, "calibration/batch_entropy_100bins": 0.3489320427418314, "calibration/batch_entropy_10bins": 0.2653753261224933, "calibration/batch_entropy_50bins": 0.4013330765192264, "calibration/batch_uniqueness": 0.507430794053826, "calibration/confidence_entropy": 0.21812283334008473, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4576217089358855, "calibration/mean_confidence": 0.9197730529173931, "calibration/prompt_uniqueness": 0.38747456446429124, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018315972222222233, "completions/max_length": 3999.2, "completions/max_terminated_length": 3999.2, "completions/mean_length": 479.84349365234374, "completions/mean_terminated_length": 488.94312744140626, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.023999700003749954, "grad_norm": 0.0026754760183393955, "learning_rate": 1.1904761904761906e-06, "loss": -0.0003, "num_tokens": 17661352.0, "reward": 0.5657774209976196, "reward_std": 0.43071451783180237, "rewards/accuracy_reward": 0.29192708134651185, "rewards/brier_reward": 0.35417120456695556, "rewards/confidence_uniqueness_reward": 0.3518584191799164, "rewards/format_reward": 0.7115451455116272, "rewards/frontier_aurc_reward": 0.3074465751647949, "rewards/frontier_coverage_0": 0.3074465751647949, "rewards/frontier_coverage_1": 0.3074465751647949, "rewards/frontier_coverage_10": 0.3074465751647949, "rewards/frontier_coverage_15": 0.3074465751647949, "rewards/frontier_coverage_20": 0.3074465751647949, "rewards/frontier_coverage_25": 0.3074465751647949, "rewards/frontier_coverage_5": 0.3074465751647949, "rewards/frontier_ece_reward": 0.3074465751647949, "rewards/frontier_entropy_batch_reward": -0.6805100202560425, "signal/accuracy_reward/centered_abs_mean": 0.326611328125, "signal/accuracy_reward/group_bin_occupancy": 0.2420138888888889, "signal/accuracy_reward/group_std_mean": 0.38544551730155946, "signal/accuracy_reward/group_zero_std_frac": 0.06388889066874981, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1633056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1633056640625, "signal/advantage_abs_mean": 0.35993013381958006, "signal/advantage_pre_scale_abs_mean": 0.35993013381958006, "signal/advantage_pre_scale_std": 0.43645923137664794, "signal/advantage_std": 0.43645923137664794, "signal/brier_reward/centered_abs_mean": 0.3224785506725311, "signal/brier_reward/group_bin_occupancy": 0.5399305555555556, "signal/brier_reward/group_std_mean": 0.3748953461647034, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03224785625934601, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03224785625934601, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2230827957391739, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6263888888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.27950537800788877, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022308281064033507, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022308281064033507, "signal/format_reward/centered_abs_mean": 0.3555935323238373, "signal/format_reward/group_bin_occupancy": 0.2489583333333333, "signal/format_reward/group_std_mean": 0.42049226760864256, "signal/format_reward/group_zero_std_frac": 0.008333333395421505, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17779676616191864, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.17779676616191864, "signal/frontier_aurc_reward/centered_abs_mean": 0.32133702635765077, "signal/frontier_aurc_reward/group_bin_occupancy": 0.41215277777777787, "signal/frontier_aurc_reward/group_std_mean": 0.3776120483875275, "signal/frontier_aurc_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_0/centered_abs_mean": 0.32133702635765077, "signal/frontier_coverage_0/group_bin_occupancy": 0.41215277777777787, "signal/frontier_coverage_0/group_std_mean": 0.3776120483875275, "signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_1/centered_abs_mean": 0.32133702635765077, "signal/frontier_coverage_1/group_bin_occupancy": 0.41215277777777787, "signal/frontier_coverage_1/group_std_mean": 0.3776120483875275, "signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_10/centered_abs_mean": 0.32133702635765077, "signal/frontier_coverage_10/group_bin_occupancy": 0.41215277777777787, "signal/frontier_coverage_10/group_std_mean": 0.3776120483875275, "signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_15/centered_abs_mean": 0.32133702635765077, "signal/frontier_coverage_15/group_bin_occupancy": 0.41215277777777787, "signal/frontier_coverage_15/group_std_mean": 0.3776120483875275, "signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_20/centered_abs_mean": 0.32133702635765077, "signal/frontier_coverage_20/group_bin_occupancy": 0.41215277777777787, "signal/frontier_coverage_20/group_std_mean": 0.3776120483875275, "signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_25/centered_abs_mean": 0.32133702635765077, "signal/frontier_coverage_25/group_bin_occupancy": 0.41215277777777787, "signal/frontier_coverage_25/group_std_mean": 0.3776120483875275, "signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_5/centered_abs_mean": 0.32133702635765077, "signal/frontier_coverage_5/group_bin_occupancy": 0.41215277777777787, "signal/frontier_coverage_5/group_std_mean": 0.3776120483875275, "signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004016713052988052, "signal/frontier_ece_reward/centered_abs_mean": 0.32133702635765077, "signal/frontier_ece_reward/group_bin_occupancy": 0.41215277777777787, "signal/frontier_ece_reward/group_std_mean": 0.3776120483875275, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03213370442390442, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03213370442390442, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.380006468296051, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3114583333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44059685468673704, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038000645488500594, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038000645488500594, "step": 10 }, { "calibration/aurc": 0.5613535708127804, "calibration/batch_distribution_entropy": 0.30285369677867097, "calibration/batch_entropy_100bins": 0.366809675757525, "calibration/batch_entropy_10bins": 0.30285369677867097, "calibration/batch_entropy_50bins": 0.42733167303489505, "calibration/batch_uniqueness": 0.5391804649021238, "calibration/confidence_entropy": 0.24734191433149855, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5070262821896179, "calibration/mean_confidence": 0.9101403023276001, "calibration/prompt_uniqueness": 0.44865000281603207, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011284722222222255, "completions/max_length": 3958.4, "completions/max_terminated_length": 3958.4, "completions/mean_length": 423.924658203125, "completions/mean_terminated_length": 428.80230712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 37.8, "epoch": 0.03599955000562493, "grad_norm": 0.0023876321502029896, "learning_rate": 1.7857142857142859e-06, "loss": -0.0106, "num_tokens": 25646948.0, "reward": 0.6934881210327148, "reward_std": 0.3449582040309906, "rewards/accuracy_reward": 0.31979166269302367, "rewards/brier_reward": 0.42017869353294374, "rewards/confidence_uniqueness_reward": 0.49401273131370543, "rewards/format_reward": 0.9217881917953491, "rewards/frontier_aurc_reward": 0.34637343883514404, "rewards/frontier_coverage_0": 0.34637343883514404, "rewards/frontier_coverage_1": 0.34637343883514404, "rewards/frontier_coverage_10": 0.34637343883514404, "rewards/frontier_coverage_15": 0.34637343883514404, "rewards/frontier_coverage_20": 0.34637343883514404, "rewards/frontier_coverage_25": 0.34637343883514404, "rewards/frontier_coverage_5": 0.34637343883514404, "rewards/frontier_ece_reward": 0.34637343883514404, "rewards/frontier_entropy_batch_reward": -0.8799565434455872, "signal/accuracy_reward/centered_abs_mean": 0.31633029580116273, "signal/accuracy_reward/group_bin_occupancy": 0.23958333333333331, "signal/accuracy_reward/group_std_mean": 0.37618979811668396, "signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15816514790058137, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15816514790058137, "signal/advantage_abs_mean": 0.2800732344388962, "signal/advantage_pre_scale_abs_mean": 0.2800732344388962, "signal/advantage_pre_scale_std": 0.3535742461681366, "signal/advantage_std": 0.3535742461681366, "signal/brier_reward/centered_abs_mean": 0.29867386221885683, "signal/brier_reward/group_bin_occupancy": 0.5961805555555555, "signal/brier_reward/group_std_mean": 0.35164528489112856, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02986738607287407, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02986738607287407, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17988529205322265, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6309027777777778, "signal/confidence_uniqueness_reward/group_std_mean": 0.23147567808628083, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017988529428839682, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017988529428839682, "signal/format_reward/centered_abs_mean": 0.1297905795276165, "signal/format_reward/group_bin_occupancy": 0.21909722222222222, "signal/format_reward/group_std_mean": 0.21357380449771882, "signal/format_reward/group_zero_std_frac": 0.24722222574055194, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06489528976380825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.06489528976380825, "signal/frontier_aurc_reward/centered_abs_mean": 0.30928654670715333, "signal/frontier_aurc_reward/group_bin_occupancy": 0.46701388888888895, "signal/frontier_aurc_reward/group_std_mean": 0.366342568397522, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_0/centered_abs_mean": 0.30928654670715333, "signal/frontier_coverage_0/group_bin_occupancy": 0.46701388888888895, "signal/frontier_coverage_0/group_std_mean": 0.366342568397522, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_1/centered_abs_mean": 0.30928654670715333, "signal/frontier_coverage_1/group_bin_occupancy": 0.46701388888888895, "signal/frontier_coverage_1/group_std_mean": 0.366342568397522, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_10/centered_abs_mean": 0.30928654670715333, "signal/frontier_coverage_10/group_bin_occupancy": 0.46701388888888895, "signal/frontier_coverage_10/group_std_mean": 0.366342568397522, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_15/centered_abs_mean": 0.30928654670715333, "signal/frontier_coverage_15/group_bin_occupancy": 0.46701388888888895, "signal/frontier_coverage_15/group_std_mean": 0.366342568397522, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_20/centered_abs_mean": 0.30928654670715333, "signal/frontier_coverage_20/group_bin_occupancy": 0.46701388888888895, "signal/frontier_coverage_20/group_std_mean": 0.366342568397522, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_25/centered_abs_mean": 0.30928654670715333, "signal/frontier_coverage_25/group_bin_occupancy": 0.46701388888888895, "signal/frontier_coverage_25/group_std_mean": 0.366342568397522, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_5/centered_abs_mean": 0.30928654670715333, "signal/frontier_coverage_5/group_bin_occupancy": 0.46701388888888895, "signal/frontier_coverage_5/group_std_mean": 0.366342568397522, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038660819176584483, "signal/frontier_ece_reward/centered_abs_mean": 0.30928654670715333, "signal/frontier_ece_reward/group_bin_occupancy": 0.46701388888888895, "signal/frontier_ece_reward/group_std_mean": 0.366342568397522, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030928655341267586, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030928655341267586, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1947160005569458, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.31145833333333334, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3006825089454651, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07222222201526166, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019471600651741028, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019471600651741028, "step": 15 }, { "calibration/aurc": 0.44435261205240256, "calibration/batch_distribution_entropy": 0.3935012421540486, "calibration/batch_entropy_100bins": 0.3987097622781842, "calibration/batch_entropy_10bins": 0.3935012421540486, "calibration/batch_entropy_50bins": 0.46414418845684446, "calibration/batch_uniqueness": 0.5991141979420094, "calibration/buffer_distribution_entropy": 0.3111829772402379, "calibration/buffer_entropy_100bins": 0.37510196723994477, "calibration/buffer_entropy_10bins": 0.3111829772402379, "calibration/buffer_entropy_50bins": 0.4347081916262859, "calibration/confidence_entropy": 0.3047036317626108, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.10091759638224643, "calibration/coverage@5%": 0.0, "calibration/ece": 0.38096295336946745, "calibration/mean_confidence": 0.8873765282761564, "calibration/prompt_uniqueness": 0.5068829583550546, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01050347222222221, "completions/max_length": 3976.8, "completions/max_terminated_length": 3976.8, "completions/mean_length": 469.0471435546875, "completions/mean_terminated_length": 474.0674682617188, "completions/min_length": 0.0, "completions/min_terminated_length": 92.8, "epoch": 0.04799940000749991, "grad_norm": 0.001804217929020524, "learning_rate": 2.380952380952381e-06, "loss": -0.0096, "num_tokens": 34164067.0, "reward": 0.7753249049186707, "reward_std": 0.2653871446847916, "rewards/accuracy_reward": 0.44453125, "rewards/brier_reward": 0.5578694939613342, "rewards/confidence_uniqueness_reward": 0.5910940289497375, "rewards/format_reward": 0.9848958373069763, "rewards/frontier_aurc_reward": 0.1972955190576613, "rewards/frontier_coverage_0": 0.20763492183759807, "rewards/frontier_coverage_1": 0.20763492183759807, "rewards/frontier_coverage_10": 0.20763492183759807, "rewards/frontier_coverage_15": 0.20763492183759807, "rewards/frontier_coverage_20": 0.20763492183759807, "rewards/frontier_coverage_25": 0.20763492183759807, "rewards/frontier_coverage_5": 0.20763492183759807, "rewards/frontier_ece_reward": 0.19086738899350167, "rewards/frontier_entropy_batch_reward": -0.9400596499443055, "signal/accuracy_reward/centered_abs_mean": 0.29937608242034913, "signal/accuracy_reward/group_bin_occupancy": 0.2409722222222222, "signal/accuracy_reward/group_std_mean": 0.36640662550926206, "signal/accuracy_reward/group_zero_std_frac": 0.07222222425043583, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14968804121017457, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14968804121017457, "signal/advantage_abs_mean": 0.21280945539474488, "signal/advantage_pre_scale_abs_mean": 0.21280945539474488, "signal/advantage_pre_scale_std": 0.274143123626709, "signal/advantage_std": 0.274143123626709, "signal/brier_reward/centered_abs_mean": 0.2618247151374817, "signal/brier_reward/group_bin_occupancy": 0.6493055555555556, "signal/brier_reward/group_std_mean": 0.3188887655735016, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026182472333312036, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.026182472333312036, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1749788463115692, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6107638888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.2077195405960083, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01749788485467434, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01749788485467434, "signal/format_reward/centered_abs_mean": 0.02798394113779068, "signal/format_reward/group_bin_occupancy": 0.16215277777777776, "signal/format_reward/group_std_mean": 0.06326771751046181, "signal/format_reward/group_zero_std_frac": 0.7027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01399197056889534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01399197056889534, "signal/frontier_aurc_reward/centered_abs_mean": 0.12959627383388578, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6298611111111111, "signal/frontier_aurc_reward/group_std_mean": 0.15695078764110804, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0016199534831685015, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0016199534831685015, "signal/frontier_coverage_0/centered_abs_mean": 0.1446688774973154, "signal/frontier_coverage_0/group_bin_occupancy": 0.61875, "signal/frontier_coverage_0/group_std_mean": 0.18371742591261864, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_1/centered_abs_mean": 0.1446688774973154, "signal/frontier_coverage_1/group_bin_occupancy": 0.61875, "signal/frontier_coverage_1/group_std_mean": 0.18371742591261864, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_10/centered_abs_mean": 0.1446688774973154, "signal/frontier_coverage_10/group_bin_occupancy": 0.61875, "signal/frontier_coverage_10/group_std_mean": 0.18371742591261864, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_15/centered_abs_mean": 0.1446688774973154, "signal/frontier_coverage_15/group_bin_occupancy": 0.61875, "signal/frontier_coverage_15/group_std_mean": 0.18371742591261864, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_20/centered_abs_mean": 0.1446688774973154, "signal/frontier_coverage_20/group_bin_occupancy": 0.61875, "signal/frontier_coverage_20/group_std_mean": 0.18371742591261864, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_25/centered_abs_mean": 0.1446688774973154, "signal/frontier_coverage_25/group_bin_occupancy": 0.61875, "signal/frontier_coverage_25/group_std_mean": 0.18371742591261864, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_5/centered_abs_mean": 0.1446688774973154, "signal/frontier_coverage_5/group_bin_occupancy": 0.61875, "signal/frontier_coverage_5/group_std_mean": 0.18371742591261864, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001808361045550555, "signal/frontier_ece_reward/centered_abs_mean": 0.21210699677467346, "signal/frontier_ece_reward/group_bin_occupancy": 0.5527777777777778, "signal/frontier_ece_reward/group_std_mean": 0.2537235528230667, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.021210700459778308, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.021210700459778308, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10517127364873886, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2833333333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2029697299003601, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.24166667461395264, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010517127625644208, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010517127625644208, "step": 20 }, { "calibration/aurc": 0.3473902280376032, "calibration/batch_distribution_entropy": 0.5674422584825392, "calibration/batch_entropy_100bins": 0.45404012302736235, "calibration/batch_entropy_10bins": 0.5674422584825392, "calibration/batch_entropy_50bins": 0.5285762138333968, "calibration/batch_uniqueness": 0.6932638785134733, "calibration/buffer_distribution_entropy": 0.35995197977266097, "calibration/buffer_entropy_100bins": 0.39751875483045446, "calibration/buffer_entropy_10bins": 0.35995197977266097, "calibration/buffer_entropy_50bins": 0.46073419347011135, "calibration/confidence_entropy": 0.36861193170010964, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.06354166666666666, "calibration/coverage@20%": 0.17879150175901498, "calibration/coverage@25%": 0.3004287598944591, "calibration/coverage@30%": 0.4125326370757181, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2454035888451538, "calibration/mean_confidence": 0.8470411999313076, "calibration/prompt_uniqueness": 0.5898410725874782, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01206597222222221, "completions/max_length": 3880.8, "completions/max_terminated_length": 3880.8, "completions/mean_length": 531.1012268066406, "completions/mean_terminated_length": 537.5916625976563, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.05999925000937488, "grad_norm": 0.0007845126674510539, "learning_rate": 2.9761904761904763e-06, "loss": -0.0073, "num_tokens": 43406801.0, "reward": 0.8140245795249939, "reward_std": 0.20924938917160035, "rewards/accuracy_reward": 0.5595486044883728, "rewards/brier_reward": 0.6664814233779908, "rewards/confidence_uniqueness_reward": 0.6804561376571655, "rewards/format_reward": 0.9850694537162781, "rewards/frontier_aurc_reward": -0.004134485684335232, "rewards/frontier_coverage_0": 0.001367491763085127, "rewards/frontier_coverage_1": 0.001367491763085127, "rewards/frontier_coverage_10": 0.001367491763085127, "rewards/frontier_coverage_15": 0.001367491763085127, "rewards/frontier_coverage_20": 0.001367491763085127, "rewards/frontier_coverage_25": 0.001367491763085127, "rewards/frontier_coverage_5": 0.001367491763085127, "rewards/frontier_ece_reward": 0.012747335172025486, "rewards/frontier_entropy_batch_reward": -0.9432091236114502, "signal/accuracy_reward/centered_abs_mean": 0.26035155951976774, "signal/accuracy_reward/group_bin_occupancy": 0.23125, "signal/accuracy_reward/group_std_mean": 0.3247913300991058, "signal/accuracy_reward/group_zero_std_frac": 0.1500000014901161, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13017577975988387, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13017577975988387, "signal/advantage_abs_mean": 0.16362378895282745, "signal/advantage_pre_scale_abs_mean": 0.16362378895282745, "signal/advantage_pre_scale_std": 0.22541998326778412, "signal/advantage_std": 0.22541998326778412, "signal/brier_reward/centered_abs_mean": 0.20725049078464508, "signal/brier_reward/group_bin_occupancy": 0.7125, "signal/brier_reward/group_std_mean": 0.25996835231781007, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020725049078464508, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020725049078464508, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.13917125761508942, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6472222222222224, "signal/confidence_uniqueness_reward/group_std_mean": 0.16648139357566832, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013917125947773456, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013917125947773456, "signal/format_reward/centered_abs_mean": 0.02619357593357563, "signal/format_reward/group_bin_occupancy": 0.15347222222222223, "signal/format_reward/group_std_mean": 0.0523154728114605, "signal/format_reward/group_zero_std_frac": 0.7722222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013096787966787815, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013096787966787815, "signal/frontier_aurc_reward/centered_abs_mean": 0.003040881175547838, "signal/frontier_aurc_reward/group_bin_occupancy": 0.684375, "signal/frontier_aurc_reward/group_std_mean": 0.004443522915244103, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.801101702265442e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.801101702265442e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.03864669501781463, "signal/frontier_coverage_0/group_bin_occupancy": 0.7611111111111112, "signal/frontier_coverage_0/group_std_mean": 0.05980467274785042, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_1/centered_abs_mean": 0.03864669501781463, "signal/frontier_coverage_1/group_bin_occupancy": 0.7611111111111112, "signal/frontier_coverage_1/group_std_mean": 0.05980467274785042, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_10/centered_abs_mean": 0.03864669501781463, "signal/frontier_coverage_10/group_bin_occupancy": 0.7611111111111112, "signal/frontier_coverage_10/group_std_mean": 0.05980467274785042, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_15/centered_abs_mean": 0.03864669501781463, "signal/frontier_coverage_15/group_bin_occupancy": 0.7611111111111112, "signal/frontier_coverage_15/group_std_mean": 0.05980467274785042, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_20/centered_abs_mean": 0.03864669501781463, "signal/frontier_coverage_20/group_bin_occupancy": 0.7611111111111112, "signal/frontier_coverage_20/group_std_mean": 0.05980467274785042, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_25/centered_abs_mean": 0.03864669501781463, "signal/frontier_coverage_25/group_bin_occupancy": 0.7611111111111112, "signal/frontier_coverage_25/group_std_mean": 0.05980467274785042, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_5/centered_abs_mean": 0.03864669501781463, "signal/frontier_coverage_5/group_bin_occupancy": 0.7611111111111112, "signal/frontier_coverage_5/group_std_mean": 0.05980467274785042, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0004830837191548198, "signal/frontier_ece_reward/centered_abs_mean": 0.11394972950220109, "signal/frontier_ece_reward/group_bin_occupancy": 0.685763888888889, "signal/frontier_ece_reward/group_std_mean": 0.13674613535404206, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011394973285496235, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011394973285496235, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09805433601140975, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25659722222222225, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1901380091905594, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.31666666865348814, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009805433824658394, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009805433824658394, "step": 25 }, { "calibration/aurc": 0.3127096016826255, "calibration/batch_distribution_entropy": 0.682729244006534, "calibration/batch_entropy_100bins": 0.4653335228882831, "calibration/batch_entropy_10bins": 0.682729244006534, "calibration/batch_entropy_50bins": 0.5435833807311128, "calibration/batch_uniqueness": 0.7277807574566821, "calibration/buffer_distribution_entropy": 0.44270389836197455, "calibration/buffer_entropy_100bins": 0.43136583308440946, "calibration/buffer_entropy_10bins": 0.44270389836197455, "calibration/buffer_entropy_50bins": 0.5002752341976129, "calibration/confidence_entropy": 0.47803069493853273, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.06774193548387096, "calibration/coverage@25%": 0.16167049483339363, "calibration/coverage@30%": 0.5524753519206546, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1434195157005612, "calibration/mean_confidence": 0.7807785223650668, "calibration/prompt_uniqueness": 0.6348340446173696, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015451388888888907, "completions/max_length": 3997.2, "completions/max_terminated_length": 3997.2, "completions/mean_length": 605.9027099609375, "completions/mean_terminated_length": 615.4102783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 145.8, "epoch": 0.07199910001124986, "grad_norm": 0.000519786321092397, "learning_rate": 3.5714285714285718e-06, "loss": -0.008, "num_tokens": 53496720.0, "reward": 0.8398738265037536, "reward_std": 0.1871805250644684, "rewards/accuracy_reward": 0.598524296283722, "rewards/brier_reward": 0.7149089694023132, "rewards/confidence_uniqueness_reward": 0.7139938831329345, "rewards/format_reward": 0.9826388955116272, "rewards/frontier_aurc_reward": -0.003387345978990197, "rewards/frontier_coverage_0": -0.007904923893511296, "rewards/frontier_coverage_1": -0.007904923893511296, "rewards/frontier_coverage_10": -0.007904923893511296, "rewards/frontier_coverage_15": -0.007904923893511296, "rewards/frontier_coverage_20": -0.007904923893511296, "rewards/frontier_coverage_25": -0.007904923893511296, "rewards/frontier_coverage_5": -0.007904923893511296, "rewards/frontier_ece_reward": 0.0140090461820364, "rewards/frontier_entropy_batch_reward": -0.9426495194435119, "signal/accuracy_reward/centered_abs_mean": 0.23240560591220855, "signal/accuracy_reward/group_bin_occupancy": 0.22430555555555554, "signal/accuracy_reward/group_std_mean": 0.29407615661621095, "signal/accuracy_reward/group_zero_std_frac": 0.20555555820465088, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11620280295610427, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11620280295610427, "signal/advantage_abs_mean": 0.1434020832180977, "signal/advantage_pre_scale_abs_mean": 0.1434020832180977, "signal/advantage_pre_scale_std": 0.2102207988500595, "signal/advantage_std": 0.2102207988500595, "signal/brier_reward/centered_abs_mean": 0.16666824817657472, "signal/brier_reward/group_bin_occupancy": 0.7510416666666667, "signal/brier_reward/group_std_mean": 0.2106872409582138, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016666825301945208, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016666825301945208, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10946927219629288, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6517361111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.13697669506072999, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010946927219629287, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010946927219629287, "signal/format_reward/centered_abs_mean": 0.02850477434694767, "signal/format_reward/group_bin_occupancy": 0.15138888888888888, "signal/format_reward/group_std_mean": 0.05226071253418922, "signal/format_reward/group_zero_std_frac": 0.7888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014252387173473834, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014252387173473834, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018850252265110612, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7041666666666667, "signal/frontier_aurc_reward/group_std_mean": 0.002827100735157728, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.356281656830106e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.356281656830106e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.05347522720694542, "signal/frontier_coverage_0/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_0/group_std_mean": 0.07446252554655075, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_1/centered_abs_mean": 0.05347522720694542, "signal/frontier_coverage_1/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_1/group_std_mean": 0.07446252554655075, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_10/centered_abs_mean": 0.05347522720694542, "signal/frontier_coverage_10/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_10/group_std_mean": 0.07446252554655075, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_15/centered_abs_mean": 0.05347522720694542, "signal/frontier_coverage_15/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_15/group_std_mean": 0.07446252554655075, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_20/centered_abs_mean": 0.05347522720694542, "signal/frontier_coverage_20/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_20/group_std_mean": 0.07446252554655075, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_25/centered_abs_mean": 0.05347522720694542, "signal/frontier_coverage_25/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_25/group_std_mean": 0.07446252554655075, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_5/centered_abs_mean": 0.05347522720694542, "signal/frontier_coverage_5/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_5/group_std_mean": 0.07446252554655075, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00066844035172835, "signal/frontier_ece_reward/centered_abs_mean": 0.06974484175443649, "signal/frontier_ece_reward/group_bin_occupancy": 0.726388888888889, "signal/frontier_ece_reward/group_std_mean": 0.08952345997095108, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006974484585225582, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006974484585225582, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09957558512687684, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25381944444444443, "signal/frontier_entropy_batch_reward/group_std_mean": 0.19237028956413268, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.31944444179534914, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00995755884796381, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00995755884796381, "step": 30 }, { "calibration/aurc": 0.2571704237558802, "calibration/batch_distribution_entropy": 0.7051875161366095, "calibration/batch_entropy_100bins": 0.48735346857362777, "calibration/batch_entropy_10bins": 0.7051875161366095, "calibration/batch_entropy_50bins": 0.5711749632836897, "calibration/batch_uniqueness": 0.7382661567514962, "calibration/buffer_distribution_entropy": 0.5312403517873816, "calibration/buffer_entropy_100bins": 0.46550454069451846, "calibration/buffer_entropy_10bins": 0.5312403517873816, "calibration/buffer_entropy_50bins": 0.5407207375942191, "calibration/confidence_entropy": 0.5339174803962459, "calibration/coverage@0%": 0.009015236807393835, "calibration/coverage@1%": 0.009015236807393835, "calibration/coverage@10%": 0.06192117185778577, "calibration/coverage@15%": 0.12324599591726784, "calibration/coverage@20%": 0.20330160686285859, "calibration/coverage@25%": 0.5564677729596665, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.027962605228446468, "calibration/ece": 0.11199284756638057, "calibration/mean_confidence": 0.7308071025680081, "calibration/prompt_uniqueness": 0.6523236468285705, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021614583333333326, "completions/max_length": 3965.0, "completions/max_terminated_length": 3965.0, "completions/mean_length": 644.9698852539062, "completions/mean_terminated_length": 659.2716430664062, "completions/min_length": 0.0, "completions/min_terminated_length": 206.2, "epoch": 0.08399895001312484, "grad_norm": 0.0004933425807394087, "learning_rate": 4.166666666666667e-06, "loss": -0.0138, "num_tokens": 64004213.0, "reward": 0.8588806509971618, "reward_std": 0.17558546364307404, "rewards/accuracy_reward": 0.6338541626930236, "rewards/brier_reward": 0.7461713314056396, "rewards/confidence_uniqueness_reward": 0.7108201503753662, "rewards/format_reward": 0.9765625, "rewards/frontier_aurc_reward": -0.0027221166528761387, "rewards/frontier_coverage_0": -0.01736396786291152, "rewards/frontier_coverage_1": -0.01736396786291152, "rewards/frontier_coverage_10": -0.01736396786291152, "rewards/frontier_coverage_15": -0.01736396786291152, "rewards/frontier_coverage_20": -0.01736396786291152, "rewards/frontier_coverage_25": -0.01736396786291152, "rewards/frontier_coverage_5": -0.01736396786291152, "rewards/frontier_ece_reward": 0.013199667818844319, "rewards/frontier_entropy_batch_reward": -0.9179341077804566, "signal/accuracy_reward/centered_abs_mean": 0.20397135317325593, "signal/accuracy_reward/group_bin_occupancy": 0.21909722222222222, "signal/accuracy_reward/group_std_mean": 0.26651409566402434, "signal/accuracy_reward/group_zero_std_frac": 0.24722222089767457, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10198567658662797, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10198567658662797, "signal/advantage_abs_mean": 0.1289672315120697, "signal/advantage_pre_scale_abs_mean": 0.1289672315120697, "signal/advantage_pre_scale_std": 0.19859465956687927, "signal/advantage_std": 0.19859465956687927, "signal/brier_reward/centered_abs_mean": 0.1401739925146103, "signal/brier_reward/group_bin_occupancy": 0.7861111111111111, "signal/brier_reward/group_std_mean": 0.18215077519416809, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014017399214208127, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014017399214208127, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11878160536289215, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6989583333333333, "signal/confidence_uniqueness_reward/group_std_mean": 0.14878978729248046, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011878160759806633, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011878160759806633, "signal/format_reward/centered_abs_mean": 0.03610026016831398, "signal/format_reward/group_bin_occupancy": 0.15694444444444444, "signal/format_reward/group_std_mean": 0.06481491774320602, "signal/format_reward/group_zero_std_frac": 0.7444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01805013008415699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01805013008415699, "signal/frontier_aurc_reward/centered_abs_mean": 0.001359763811342418, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7402777777777778, "signal/frontier_aurc_reward/group_std_mean": 0.0021033880300819876, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6997047714539805e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6997047714539805e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.06835834383964538, "signal/frontier_coverage_0/group_bin_occupancy": 0.8423611111111111, "signal/frontier_coverage_0/group_std_mean": 0.09002460837364197, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_1/centered_abs_mean": 0.06835834383964538, "signal/frontier_coverage_1/group_bin_occupancy": 0.8423611111111111, "signal/frontier_coverage_1/group_std_mean": 0.09002460837364197, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_10/centered_abs_mean": 0.06835834383964538, "signal/frontier_coverage_10/group_bin_occupancy": 0.8423611111111111, "signal/frontier_coverage_10/group_std_mean": 0.09002460837364197, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_15/centered_abs_mean": 0.06835834383964538, "signal/frontier_coverage_15/group_bin_occupancy": 0.8423611111111111, "signal/frontier_coverage_15/group_std_mean": 0.09002460837364197, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_20/centered_abs_mean": 0.06835834383964538, "signal/frontier_coverage_20/group_bin_occupancy": 0.8423611111111111, "signal/frontier_coverage_20/group_std_mean": 0.09002460837364197, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_25/centered_abs_mean": 0.06835834383964538, "signal/frontier_coverage_25/group_bin_occupancy": 0.8423611111111111, "signal/frontier_coverage_25/group_std_mean": 0.09002460837364197, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_5/centered_abs_mean": 0.06835834383964538, "signal/frontier_coverage_5/group_bin_occupancy": 0.8423611111111111, "signal/frontier_coverage_5/group_std_mean": 0.09002460837364197, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008544792886823416, "signal/frontier_ece_reward/centered_abs_mean": 0.0456878550350666, "signal/frontier_ece_reward/group_bin_occupancy": 0.726736111111111, "signal/frontier_ece_reward/group_std_mean": 0.06319972574710846, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004568785382434726, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004568785382434726, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13658613115549087, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3038194444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24717094898223876, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.17777777910232545, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013658612966537476, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013658612966537476, "step": 35 }, { "calibration/aurc": 0.3090385450407646, "calibration/batch_distribution_entropy": 0.7382054017370615, "calibration/batch_entropy_100bins": 0.6274540890939544, "calibration/batch_entropy_10bins": 0.7382054017370615, "calibration/batch_entropy_50bins": 0.6888115588991056, "calibration/batch_uniqueness": 0.8256191615498129, "calibration/buffer_distribution_entropy": 0.5895912459678766, "calibration/buffer_entropy_100bins": 0.4977340263064625, "calibration/buffer_entropy_10bins": 0.5895912459678766, "calibration/buffer_entropy_50bins": 0.5756374115825056, "calibration/confidence_entropy": 0.5221476459958998, "calibration/coverage@0%": 0.002116402116402116, "calibration/coverage@1%": 0.002116402116402116, "calibration/coverage@10%": 0.06582397391796871, "calibration/coverage@15%": 0.07211101440866455, "calibration/coverage@20%": 0.11545305096218934, "calibration/coverage@25%": 0.2742136018898421, "calibration/coverage@30%": 0.3149446723859256, "calibration/coverage@5%": 0.014649039192120131, "calibration/ece": 0.11780196610367691, "calibration/mean_confidence": 0.7270655645788431, "calibration/prompt_uniqueness": 0.761317842320403, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017881944444444443, "completions/max_length": 3955.8, "completions/max_terminated_length": 3955.8, "completions/mean_length": 685.938818359375, "completions/mean_terminated_length": 698.407666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 226.6, "epoch": 0.09599880001499982, "grad_norm": 0.00048642107867635787, "learning_rate": 4.761904761904762e-06, "loss": -0.0134, "num_tokens": 75025748.0, "reward": 0.8857535004615784, "reward_std": 0.178457173705101, "rewards/accuracy_reward": 0.6368923425674439, "rewards/brier_reward": 0.7461103081703186, "rewards/confidence_uniqueness_reward": 0.8087468624114991, "rewards/format_reward": 0.9810763955116272, "rewards/frontier_aurc_reward": -0.0026570867747068407, "rewards/frontier_coverage_0": -0.019249437330290674, "rewards/frontier_coverage_1": -0.019249437330290674, "rewards/frontier_coverage_10": -0.019249437330290674, "rewards/frontier_coverage_15": -0.019249437330290674, "rewards/frontier_coverage_20": -0.019249437330290674, "rewards/frontier_coverage_25": -0.019249437330290674, "rewards/frontier_coverage_5": -0.019249437330290674, "rewards/frontier_ece_reward": 0.008992346841841936, "rewards/frontier_entropy_batch_reward": -0.7789829492568969, "signal/accuracy_reward/centered_abs_mean": 0.20183919072151185, "signal/accuracy_reward/group_bin_occupancy": 0.21909722222222222, "signal/accuracy_reward/group_std_mean": 0.26670118868350984, "signal/accuracy_reward/group_zero_std_frac": 0.2472222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10091959536075593, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10091959536075593, "signal/advantage_abs_mean": 0.13196419775485993, "signal/advantage_pre_scale_abs_mean": 0.13196419775485993, "signal/advantage_pre_scale_std": 0.20010344088077545, "signal/advantage_std": 0.20010344088077545, "signal/brier_reward/centered_abs_mean": 0.14929873943328859, "signal/brier_reward/group_bin_occupancy": 0.8149305555555555, "signal/brier_reward/group_std_mean": 0.19311635494232177, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014929874055087566, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014929874055087566, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10491774380207061, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6815972222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.13206958025693893, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010491774696856738, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010491774696856738, "signal/format_reward/centered_abs_mean": 0.03161892369389534, "signal/format_reward/group_bin_occupancy": 0.15243055555555557, "signal/format_reward/group_std_mean": 0.0556372843682766, "signal/format_reward/group_zero_std_frac": 0.7805555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01580946184694767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01580946184694767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018603557720780373, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7145833333333333, "signal/frontier_aurc_reward/group_std_mean": 0.002942401263862848, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.325444802409038e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.325444802409038e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.08571741878986358, "signal/frontier_coverage_0/group_bin_occupancy": 0.8364583333333334, "signal/frontier_coverage_0/group_std_mean": 0.11739055812358856, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_1/centered_abs_mean": 0.08571741878986358, "signal/frontier_coverage_1/group_bin_occupancy": 0.8364583333333334, "signal/frontier_coverage_1/group_std_mean": 0.11739055812358856, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_10/centered_abs_mean": 0.08571741878986358, "signal/frontier_coverage_10/group_bin_occupancy": 0.8364583333333334, "signal/frontier_coverage_10/group_std_mean": 0.11739055812358856, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_15/centered_abs_mean": 0.08571741878986358, "signal/frontier_coverage_15/group_bin_occupancy": 0.8364583333333334, "signal/frontier_coverage_15/group_std_mean": 0.11739055812358856, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_20/centered_abs_mean": 0.08571741878986358, "signal/frontier_coverage_20/group_bin_occupancy": 0.8364583333333334, "signal/frontier_coverage_20/group_std_mean": 0.11739055812358856, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_25/centered_abs_mean": 0.08571741878986358, "signal/frontier_coverage_25/group_bin_occupancy": 0.8364583333333334, "signal/frontier_coverage_25/group_std_mean": 0.11739055812358856, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_5/centered_abs_mean": 0.08571741878986358, "signal/frontier_coverage_5/group_bin_occupancy": 0.8364583333333334, "signal/frontier_coverage_5/group_std_mean": 0.11739055812358856, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010714677278883754, "signal/frontier_ece_reward/centered_abs_mean": 0.0511918880045414, "signal/frontier_ece_reward/group_bin_occupancy": 0.7284722222222222, "signal/frontier_ece_reward/group_std_mean": 0.08025645166635513, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005119189154356718, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005119189154356718, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3113617360591888, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5229166666666666, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4173878490924835, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01388888917863369, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031136173382401468, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031136173382401468, "step": 40 }, { "calibration/aurc": 0.22827400940686599, "calibration/batch_distribution_entropy": 0.9071498888134337, "calibration/batch_entropy_100bins": 0.8968223970662214, "calibration/batch_entropy_10bins": 0.9071498888134337, "calibration/batch_entropy_50bins": 0.9171079157181918, "calibration/batch_uniqueness": 0.9456390260906643, "calibration/buffer_distribution_entropy": 0.6393650601547213, "calibration/buffer_entropy_100bins": 0.5675126064584276, "calibration/buffer_entropy_10bins": 0.6393650601547213, "calibration/buffer_entropy_50bins": 0.6379876044842737, "calibration/confidence_entropy": 0.5032381198722609, "calibration/coverage@0%": 0.014736842105263156, "calibration/coverage@1%": 0.014736842105263156, "calibration/coverage@10%": 0.055916127216436816, "calibration/coverage@15%": 0.10939021240009408, "calibration/coverage@20%": 0.30897909044000377, "calibration/coverage@25%": 0.6747945574902163, "calibration/coverage@30%": 0.9791666666666666, "calibration/coverage@5%": 0.034210526315789476, "calibration/ece": 0.16323481632764078, "calibration/mean_confidence": 0.6319245579648017, "calibration/prompt_uniqueness": 0.879434752960265, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02144097222222221, "completions/max_length": 3757.8, "completions/max_terminated_length": 3757.8, "completions/mean_length": 724.39765625, "completions/mean_terminated_length": 740.4448852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 246.0, "epoch": 0.1079986500168748, "grad_norm": 0.0004063255328219384, "learning_rate": 4.909638554216868e-06, "loss": -0.0167, "num_tokens": 86506073.0, "reward": 0.9310973882675171, "reward_std": 0.17580304443836212, "rewards/accuracy_reward": 0.6424479246139526, "rewards/brier_reward": 0.7319015741348267, "rewards/confidence_uniqueness_reward": 0.9265702605247498, "rewards/format_reward": 0.9777777791023254, "rewards/frontier_aurc_reward": -0.002285001240670681, "rewards/frontier_coverage_0": -0.033109604939818384, "rewards/frontier_coverage_1": -0.033109604939818384, "rewards/frontier_coverage_10": -0.033109604939818384, "rewards/frontier_coverage_15": -0.033109604939818384, "rewards/frontier_coverage_20": -0.033109604939818384, "rewards/frontier_coverage_25": -0.033109604939818384, "rewards/frontier_coverage_5": -0.033109604939818384, "rewards/frontier_ece_reward": -0.006580299325287342, "rewards/frontier_entropy_batch_reward": -0.41278970837593076, "signal/accuracy_reward/centered_abs_mean": 0.19691297709941863, "signal/accuracy_reward/group_bin_occupancy": 0.21909722222222222, "signal/accuracy_reward/group_std_mean": 0.2627987444400787, "signal/accuracy_reward/group_zero_std_frac": 0.2472222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09845648854970931, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09845648854970931, "signal/advantage_abs_mean": 0.13047890067100526, "signal/advantage_pre_scale_abs_mean": 0.13047890067100526, "signal/advantage_pre_scale_std": 0.19528700709342955, "signal/advantage_std": 0.19528700709342955, "signal/brier_reward/centered_abs_mean": 0.19260537028312683, "signal/brier_reward/group_bin_occupancy": 0.8631944444444445, "signal/brier_reward/group_std_mean": 0.24576567709445954, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01926053762435913, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01926053762435913, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04959237203001976, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7625, "signal/confidence_uniqueness_reward/group_std_mean": 0.08034891486167908, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0049592372961342335, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0049592372961342335, "signal/format_reward/centered_abs_mean": 0.03552517332136631, "signal/format_reward/group_bin_occupancy": 0.15694444444444444, "signal/format_reward/group_std_mean": 0.06437588557600975, "signal/format_reward/group_zero_std_frac": 0.7444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017762586660683154, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017762586660683154, "signal/frontier_aurc_reward/centered_abs_mean": 0.001982824504375458, "signal/frontier_aurc_reward/group_bin_occupancy": 0.670138888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0033466981258243322, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.478530877851881e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.478530877851881e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18281554132699968, "signal/frontier_coverage_0/group_bin_occupancy": 0.867013888888889, "signal/frontier_coverage_0/group_std_mean": 0.24726351499557495, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_1/centered_abs_mean": 0.18281554132699968, "signal/frontier_coverage_1/group_bin_occupancy": 0.867013888888889, "signal/frontier_coverage_1/group_std_mean": 0.24726351499557495, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_10/centered_abs_mean": 0.18281554132699968, "signal/frontier_coverage_10/group_bin_occupancy": 0.867013888888889, "signal/frontier_coverage_10/group_std_mean": 0.24726351499557495, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_15/centered_abs_mean": 0.18281554132699968, "signal/frontier_coverage_15/group_bin_occupancy": 0.867013888888889, "signal/frontier_coverage_15/group_std_mean": 0.24726351499557495, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_20/centered_abs_mean": 0.18281554132699968, "signal/frontier_coverage_20/group_bin_occupancy": 0.867013888888889, "signal/frontier_coverage_20/group_std_mean": 0.24726351499557495, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_25/centered_abs_mean": 0.18281554132699968, "signal/frontier_coverage_25/group_bin_occupancy": 0.867013888888889, "signal/frontier_coverage_25/group_std_mean": 0.24726351499557495, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_5/centered_abs_mean": 0.18281554132699968, "signal/frontier_coverage_5/group_bin_occupancy": 0.867013888888889, "signal/frontier_coverage_5/group_std_mean": 0.24726351499557495, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002285194396972656, "signal/frontier_ece_reward/centered_abs_mean": 0.07174393385648728, "signal/frontier_ece_reward/group_bin_occupancy": 0.7809027777777777, "signal/frontier_ece_reward/group_std_mean": 0.11735818088054657, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007174393441528082, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007174393441528082, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40669200420379636, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775, "signal/frontier_entropy_batch_reward/group_std_mean": 0.46814131140708926, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04066920205950737, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04066920205950737, "step": 45 }, { "calibration/aurc": 0.39852565881023116, "calibration/batch_distribution_entropy": 0.9787449958619818, "calibration/batch_entropy_100bins": 0.9592369510418349, "calibration/batch_entropy_10bins": 0.9787449958619818, "calibration/batch_entropy_50bins": 0.971046332337011, "calibration/batch_uniqueness": 0.9525960238680078, "calibration/buffer_distribution_entropy": 0.7212969635124125, "calibration/buffer_entropy_100bins": 0.6553290358103675, "calibration/buffer_entropy_10bins": 0.7212969635124125, "calibration/buffer_entropy_50bins": 0.7162547318725517, "calibration/confidence_entropy": 0.49994193937653375, "calibration/coverage@0%": 0.0005235602094240838, "calibration/coverage@1%": 0.0005235602094240838, "calibration/coverage@10%": 0.011218747375199485, "calibration/coverage@15%": 0.011753506733488257, "calibration/coverage@20%": 0.032012193497512484, "calibration/coverage@25%": 0.06712624280218765, "calibration/coverage@30%": 0.13888077152042816, "calibration/coverage@5%": 0.0005235602094240838, "calibration/ece": 0.20714599782061244, "calibration/mean_confidence": 0.5138718686696668, "calibration/prompt_uniqueness": 0.8887597240995356, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021180555555555557, "completions/max_length": 3352.2, "completions/max_terminated_length": 3352.2, "completions/mean_length": 729.52119140625, "completions/mean_terminated_length": 745.1978759765625, "completions/min_length": 0.0, "completions/min_terminated_length": 227.8, "epoch": 0.11999850001874976, "grad_norm": 0.0005338288610801101, "learning_rate": 4.759036144578314e-06, "loss": -0.0177, "num_tokens": 98007757.0, "reward": 0.9368434190750122, "reward_std": 0.16865328848361968, "rewards/accuracy_reward": 0.6310763955116272, "rewards/brier_reward": 0.69019033908844, "rewards/confidence_uniqueness_reward": 0.9318178653717041, "rewards/format_reward": 0.9782118082046509, "rewards/frontier_aurc_reward": -0.002340958220884204, "rewards/frontier_coverage_0": -0.055156460218131545, "rewards/frontier_coverage_1": -0.055156460218131545, "rewards/frontier_coverage_10": -0.055156460218131545, "rewards/frontier_coverage_15": -0.055156460218131545, "rewards/frontier_coverage_20": -0.055156460218131545, "rewards/frontier_coverage_25": -0.055156460218131545, "rewards/frontier_coverage_5": -0.055156460218131545, "rewards/frontier_ece_reward": -0.008883633697405458, "rewards/frontier_entropy_batch_reward": -0.24257669150829314, "signal/accuracy_reward/centered_abs_mean": 0.1930555522441864, "signal/accuracy_reward/group_bin_occupancy": 0.215625, "signal/accuracy_reward/group_std_mean": 0.2544387519359589, "signal/accuracy_reward/group_zero_std_frac": 0.27499999701976774, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0965277761220932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0965277761220932, "signal/advantage_abs_mean": 0.1274535983800888, "signal/advantage_pre_scale_abs_mean": 0.1274535983800888, "signal/advantage_pre_scale_std": 0.18669797778129577, "signal/advantage_std": 0.18669797778129577, "signal/brier_reward/centered_abs_mean": 0.23190079629421234, "signal/brier_reward/group_bin_occupancy": 0.921875, "signal/brier_reward/group_std_mean": 0.2818805932998657, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023190080001950265, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023190080001950265, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04411279484629631, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7649305555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.07408891320228576, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0044112796895205975, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0044112796895205975, "signal/format_reward/centered_abs_mean": 0.035107421875, "signal/format_reward/group_bin_occupancy": 0.15694444444444444, "signal/format_reward/group_std_mean": 0.06355542615056038, "signal/format_reward/group_zero_std_frac": 0.7444444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0175537109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0175537109375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018331629456952215, "signal/frontier_aurc_reward/group_bin_occupancy": 0.646875, "signal/frontier_aurc_reward/group_std_mean": 0.003077285923063755, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2914535657037048e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2914535657037048e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2667407602071762, "signal/frontier_coverage_0/group_bin_occupancy": 0.9173611111111111, "signal/frontier_coverage_0/group_std_mean": 0.33869033455848696, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_1/centered_abs_mean": 0.2667407602071762, "signal/frontier_coverage_1/group_bin_occupancy": 0.9173611111111111, "signal/frontier_coverage_1/group_std_mean": 0.33869033455848696, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_10/centered_abs_mean": 0.2667407602071762, "signal/frontier_coverage_10/group_bin_occupancy": 0.9173611111111111, "signal/frontier_coverage_10/group_std_mean": 0.33869033455848696, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_15/centered_abs_mean": 0.2667407602071762, "signal/frontier_coverage_15/group_bin_occupancy": 0.9173611111111111, "signal/frontier_coverage_15/group_std_mean": 0.33869033455848696, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_20/centered_abs_mean": 0.2667407602071762, "signal/frontier_coverage_20/group_bin_occupancy": 0.9173611111111111, "signal/frontier_coverage_20/group_std_mean": 0.33869033455848696, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_25/centered_abs_mean": 0.2667407602071762, "signal/frontier_coverage_25/group_bin_occupancy": 0.9173611111111111, "signal/frontier_coverage_25/group_std_mean": 0.33869033455848696, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_5/centered_abs_mean": 0.2667407602071762, "signal/frontier_coverage_5/group_bin_occupancy": 0.9173611111111111, "signal/frontier_coverage_5/group_std_mean": 0.33869033455848696, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033342594746500255, "signal/frontier_ece_reward/centered_abs_mean": 0.07847718596458435, "signal/frontier_ece_reward/group_bin_occupancy": 0.871875, "signal/frontier_ece_reward/group_std_mean": 0.11430413126945496, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007847718894481659, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007847718894481659, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33370028138160707, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7784722222222222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40720880031585693, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03337002918124199, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03337002918124199, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.22663103748517832, "eval_calibration/batch_distribution_entropy": 0.8944488094393327, "eval_calibration/batch_entropy_100bins": 0.7018959966227308, "eval_calibration/batch_entropy_10bins": 0.8944488094393327, "eval_calibration/batch_entropy_50bins": 0.7678897018331208, "eval_calibration/batch_uniqueness": 0.8925259603711412, "eval_calibration/buffer_distribution_entropy": 0.7581864782785342, "eval_calibration/buffer_entropy_100bins": 0.697479079611122, "eval_calibration/buffer_entropy_10bins": 0.7581864782785342, "eval_calibration/buffer_entropy_50bins": 0.7531417625731921, "eval_calibration/confidence_entropy": 0.49303684207606624, "eval_calibration/coverage@0%": 0.17893145161290322, "eval_calibration/coverage@1%": 0.17893145161290322, "eval_calibration/coverage@10%": 0.23168682795698925, "eval_calibration/coverage@15%": 0.29939516129032256, "eval_calibration/coverage@20%": 0.49227150537634407, "eval_calibration/coverage@25%": 0.7928427419354839, "eval_calibration/coverage@30%": 0.8991935483870966, "eval_calibration/coverage@5%": 0.17893145161290322, "eval_calibration/ece": 0.2323555386214126, "eval_calibration/mean_confidence": 0.6104373585295977, "eval_calibration/prompt_uniqueness": 0.8925259603711412, "eval_completions/clipped_ratio": 0.022395833333333354, "eval_completions/max_length": 1943.6666666666667, "eval_completions/max_terminated_length": 1943.6666666666667, "eval_completions/mean_length": 708.0866495768229, "eval_completions/mean_terminated_length": 724.3104654947916, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 297.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 98007757.0, "eval_reward": 0.8659678896268209, "eval_reward_std": 0.26238663494586945, "eval_rewards/accuracy_reward": 0.6319444477558136, "eval_rewards/brier_reward": 0.736727903286616, "eval_rewards/confidence_uniqueness_reward": 0.8684482177098592, "eval_rewards/format_reward": 0.975694457689921, "eval_rewards/frontier_aurc_reward": -0.0022891214466653764, "eval_rewards/frontier_coverage_0": -0.01684247803253432, "eval_rewards/frontier_coverage_1": -0.01684247803253432, "eval_rewards/frontier_coverage_10": -0.01684247803253432, "eval_rewards/frontier_coverage_15": -0.01684247803253432, "eval_rewards/frontier_coverage_20": -0.01684247803253432, "eval_rewards/frontier_coverage_25": -0.01684247803253432, "eval_rewards/frontier_coverage_5": -0.01684247803253432, "eval_rewards/frontier_ece_reward": 0.00702586160817494, "eval_rewards/frontier_entropy_batch_reward": -0.975694457689921, "eval_runtime": 211.0238, "eval_samples_per_second": 4.739, "eval_signal/accuracy_reward/centered_abs_mean": 0.4508463541666667, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.48167944451173145, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22542317708333334, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22542317708333334, "eval_signal/advantage_abs_mean": 0.22892138113578162, "eval_signal/advantage_pre_scale_abs_mean": 0.22892138113578162, "eval_signal/advantage_pre_scale_std": 0.2612771863738696, "eval_signal/advantage_std": 0.2612771863738696, "eval_signal/brier_reward/centered_abs_mean": 0.22721747557322183, "eval_signal/brier_reward/group_bin_occupancy": 0.923611111111111, "eval_signal/brier_reward/group_std_mean": 0.2780616382757823, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0227217481782039, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0227217481782039, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06945328476528327, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3958333333333333, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1254055512448152, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006945328554138541, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006945328554138541, "eval_signal/format_reward/centered_abs_mean": 0.04589843765522043, "eval_signal/format_reward/group_bin_occupancy": 0.19097222222222224, "eval_signal/format_reward/group_std_mean": 0.10953563420722882, "eval_signal/format_reward/group_zero_std_frac": 0.4722222338120143, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022949218827610213, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.022949218827610213, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0023004660033620894, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6736111111111112, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004148481646552682, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8755823829366516e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8755823829366516e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.22893314063549042, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.951388888888889, "eval_signal/frontier_coverage_0/group_std_mean": 0.3282311459382375, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.22893314063549042, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.951388888888889, "eval_signal/frontier_coverage_1/group_std_mean": 0.3282311459382375, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.22893314063549042, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.951388888888889, "eval_signal/frontier_coverage_10/group_std_mean": 0.3282311459382375, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.22893314063549042, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.951388888888889, "eval_signal/frontier_coverage_15/group_std_mean": 0.3282311459382375, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.22893314063549042, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.951388888888889, "eval_signal/frontier_coverage_20/group_std_mean": 0.3282311459382375, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.22893314063549042, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.951388888888889, "eval_signal/frontier_coverage_25/group_std_mean": 0.3282311459382375, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.22893314063549042, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.951388888888889, "eval_signal/frontier_coverage_5/group_std_mean": 0.3282311459382375, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028616644364471235, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05358493266006311, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8263888888888888, "eval_signal/frontier_ece_reward/group_std_mean": 0.08428221692641576, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005358493188396096, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005358493188396096, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.04589843765522043, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.19097222222222224, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.10953563420722882, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4722222338120143, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004589843874176343, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.004589843874176343, "eval_steps_per_second": 0.028, "step": 50 }, { "calibration/aurc": 0.2683659949197693, "calibration/batch_distribution_entropy": 0.9526285003912491, "calibration/batch_entropy_100bins": 0.9450206874209816, "calibration/batch_entropy_10bins": 0.9526285003912491, "calibration/batch_entropy_50bins": 0.9561908449747939, "calibration/batch_uniqueness": 0.9479367620902497, "calibration/buffer_distribution_entropy": 0.77331458113982, "calibration/buffer_entropy_100bins": 0.7195050284353803, "calibration/buffer_entropy_10bins": 0.77331458113982, "calibration/buffer_entropy_50bins": 0.7715357463295008, "calibration/confidence_entropy": 0.5168335151397986, "calibration/coverage@0%": 0.019592974315080607, "calibration/coverage@1%": 0.019592974315080607, "calibration/coverage@10%": 0.033857972511265, "calibration/coverage@15%": 0.07343580892287452, "calibration/coverage@20%": 0.34841750738119537, "calibration/coverage@25%": 0.5284259703915656, "calibration/coverage@30%": 0.6675125886120853, "calibration/coverage@5%": 0.028036246082890633, "calibration/ece": 0.17036230442798692, "calibration/mean_confidence": 0.6091539475261136, "calibration/prompt_uniqueness": 0.8819837383869557, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021354166666666674, "completions/max_length": 3492.2, "completions/max_terminated_length": 3492.2, "completions/mean_length": 733.5085205078125, "completions/mean_terminated_length": 749.6647705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 229.8, "epoch": 0.13199835002062474, "grad_norm": 0.0005942412535659969, "learning_rate": 4.60843373493976e-06, "loss": -0.0179, "num_tokens": 109538351.0, "reward": 0.9447301387786865, "reward_std": 0.1707069009542465, "rewards/accuracy_reward": 0.64296875, "rewards/brier_reward": 0.728516948223114, "rewards/confidence_uniqueness_reward": 0.9280948400497436, "rewards/format_reward": 0.9776041626930236, "rewards/frontier_aurc_reward": -0.002217937121167779, "rewards/frontier_coverage_0": -0.0328390815295279, "rewards/frontier_coverage_1": -0.0328390815295279, "rewards/frontier_coverage_10": -0.0328390815295279, "rewards/frontier_coverage_15": -0.0328390815295279, "rewards/frontier_coverage_20": -0.0328390815295279, "rewards/frontier_coverage_25": -0.0328390815295279, "rewards/frontier_coverage_5": -0.0328390815295279, "rewards/frontier_ece_reward": 0.004757384280674159, "rewards/frontier_entropy_batch_reward": -0.2879209280014038, "signal/accuracy_reward/centered_abs_mean": 0.19408094584941865, "signal/accuracy_reward/group_bin_occupancy": 0.21388888888888893, "signal/accuracy_reward/group_std_mean": 0.2528954565525055, "signal/accuracy_reward/group_zero_std_frac": 0.28888889253139494, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09704047292470933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09704047292470933, "signal/advantage_abs_mean": 0.12855922281742097, "signal/advantage_pre_scale_abs_mean": 0.12855922281742097, "signal/advantage_pre_scale_std": 0.19316387176513672, "signal/advantage_std": 0.19316387176513672, "signal/brier_reward/centered_abs_mean": 0.20376710891723632, "signal/brier_reward/group_bin_occupancy": 0.9003472222222223, "signal/brier_reward/group_std_mean": 0.2530288904905319, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020376710593700408, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020376710593700408, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.045846784859895705, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7788194444444445, "signal/confidence_uniqueness_reward/group_std_mean": 0.07546174824237824, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004584678448736668, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004584678448736668, "signal/format_reward/centered_abs_mean": 0.03585069477558136, "signal/format_reward/group_bin_occupancy": 0.15659722222222222, "signal/format_reward/group_std_mean": 0.06373232007026672, "signal/format_reward/group_zero_std_frac": 0.7472222328186036, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01792534738779068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01792534738779068, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020224370528012514, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6652777777777777, "signal/frontier_aurc_reward/group_std_mean": 0.0033281259704381226, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5280463160015642e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5280463160015642e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20073602795600892, "signal/frontier_coverage_0/group_bin_occupancy": 0.8875, "signal/frontier_coverage_0/group_std_mean": 0.26514851450920107, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_1/centered_abs_mean": 0.20073602795600892, "signal/frontier_coverage_1/group_bin_occupancy": 0.8875, "signal/frontier_coverage_1/group_std_mean": 0.26514851450920107, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_10/centered_abs_mean": 0.20073602795600892, "signal/frontier_coverage_10/group_bin_occupancy": 0.8875, "signal/frontier_coverage_10/group_std_mean": 0.26514851450920107, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_15/centered_abs_mean": 0.20073602795600892, "signal/frontier_coverage_15/group_bin_occupancy": 0.8875, "signal/frontier_coverage_15/group_std_mean": 0.26514851450920107, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_20/centered_abs_mean": 0.20073602795600892, "signal/frontier_coverage_20/group_bin_occupancy": 0.8875, "signal/frontier_coverage_20/group_std_mean": 0.26514851450920107, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_25/centered_abs_mean": 0.20073602795600892, "signal/frontier_coverage_25/group_bin_occupancy": 0.8875, "signal/frontier_coverage_25/group_std_mean": 0.26514851450920107, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_5/centered_abs_mean": 0.20073602795600892, "signal/frontier_coverage_5/group_bin_occupancy": 0.8875, "signal/frontier_coverage_5/group_std_mean": 0.26514851450920107, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025092002935707567, "signal/frontier_ece_reward/centered_abs_mean": 0.05378806218504906, "signal/frontier_ece_reward/group_bin_occupancy": 0.8496527777777778, "signal/frontier_ece_reward/group_std_mean": 0.08123364597558975, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005378806497901678, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005378806497901678, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3519828081130981, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7847222222222223, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42118882536888125, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035198282450437546, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035198282450437546, "step": 55 }, { "calibration/aurc": 0.34125538207514033, "calibration/batch_distribution_entropy": 0.972611014708105, "calibration/batch_entropy_100bins": 0.9550453951098585, "calibration/batch_entropy_10bins": 0.972611014708105, "calibration/batch_entropy_50bins": 0.9672234348309336, "calibration/batch_uniqueness": 0.9504243423453544, "calibration/buffer_distribution_entropy": 0.8040459125276394, "calibration/buffer_entropy_100bins": 0.7606849562747422, "calibration/buffer_entropy_10bins": 0.8040459125276394, "calibration/buffer_entropy_50bins": 0.8061715790901408, "calibration/confidence_entropy": 0.5298289411990107, "calibration/coverage@0%": 0.0021136100291773113, "calibration/coverage@1%": 0.0021136100291773113, "calibration/coverage@10%": 0.019391096940172076, "calibration/coverage@15%": 0.09173199903161312, "calibration/coverage@20%": 0.25987322029879684, "calibration/coverage@25%": 0.29309997918512537, "calibration/coverage@30%": 0.4806800713830787, "calibration/coverage@5%": 0.0021136100291773113, "calibration/ece": 0.19323905598963226, "calibration/mean_confidence": 0.544135561508949, "calibration/prompt_uniqueness": 0.8866490591558446, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023177083333333348, "completions/max_length": 3648.6, "completions/max_terminated_length": 3648.6, "completions/mean_length": 736.1137329101563, "completions/mean_terminated_length": 753.6991821289063, "completions/min_length": 0.0, "completions/min_terminated_length": 196.8, "epoch": 0.14399820002249972, "grad_norm": 0.00039395506610162556, "learning_rate": 4.457831325301205e-06, "loss": -0.0199, "num_tokens": 121114957.0, "reward": 0.9317705035209656, "reward_std": 0.1709751844406128, "rewards/accuracy_reward": 0.6081597208976746, "rewards/brier_reward": 0.7243224620819092, "rewards/confidence_uniqueness_reward": 0.9290532946586609, "rewards/format_reward": 0.9764756917953491, "rewards/frontier_aurc_reward": -0.0022094239946454765, "rewards/frontier_coverage_0": -0.014207637775689364, "rewards/frontier_coverage_1": -0.014207637775689364, "rewards/frontier_coverage_10": -0.014207637775689364, "rewards/frontier_coverage_15": -0.014207637775689364, "rewards/frontier_coverage_20": -0.014207637775689364, "rewards/frontier_coverage_25": -0.014207637775689364, "rewards/frontier_coverage_5": -0.014207637775689364, "rewards/frontier_ece_reward": 0.005637143552303314, "rewards/frontier_entropy_batch_reward": -0.25177713930606843, "signal/accuracy_reward/centered_abs_mean": 0.20734592378139496, "signal/accuracy_reward/group_bin_occupancy": 0.2170138888888889, "signal/accuracy_reward/group_std_mean": 0.26776798665523527, "signal/accuracy_reward/group_zero_std_frac": 0.26388888657093046, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10367296189069748, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10367296189069748, "signal/advantage_abs_mean": 0.12827864587306975, "signal/advantage_pre_scale_abs_mean": 0.12827864587306975, "signal/advantage_pre_scale_std": 0.19161723256111146, "signal/advantage_std": 0.19161723256111146, "signal/brier_reward/centered_abs_mean": 0.20366644859313965, "signal/brier_reward/group_bin_occupancy": 0.90625, "signal/brier_reward/group_std_mean": 0.251072758436203, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020366644859313963, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020366644859313963, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04744342863559723, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7736111111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.07852019146084785, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004744342807680368, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004744342807680368, "signal/format_reward/centered_abs_mean": 0.03862304724752903, "signal/format_reward/group_bin_occupancy": 0.15868055555555557, "signal/format_reward/group_std_mean": 0.06846961379051208, "signal/format_reward/group_zero_std_frac": 0.7305555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.019311523623764516, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.019311523623764516, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018203360494226217, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6711805555555556, "signal/frontier_aurc_reward/group_std_mean": 0.00309151909314096, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.275420156365726e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.275420156365726e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.23083798289299012, "signal/frontier_coverage_0/group_bin_occupancy": 0.898611111111111, "signal/frontier_coverage_0/group_std_mean": 0.30217787623405457, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_1/centered_abs_mean": 0.23083798289299012, "signal/frontier_coverage_1/group_bin_occupancy": 0.898611111111111, "signal/frontier_coverage_1/group_std_mean": 0.30217787623405457, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_10/centered_abs_mean": 0.23083798289299012, "signal/frontier_coverage_10/group_bin_occupancy": 0.898611111111111, "signal/frontier_coverage_10/group_std_mean": 0.30217787623405457, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_15/centered_abs_mean": 0.23083798289299012, "signal/frontier_coverage_15/group_bin_occupancy": 0.898611111111111, "signal/frontier_coverage_15/group_std_mean": 0.30217787623405457, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_20/centered_abs_mean": 0.23083798289299012, "signal/frontier_coverage_20/group_bin_occupancy": 0.898611111111111, "signal/frontier_coverage_20/group_std_mean": 0.30217787623405457, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_25/centered_abs_mean": 0.23083798289299012, "signal/frontier_coverage_25/group_bin_occupancy": 0.898611111111111, "signal/frontier_coverage_25/group_std_mean": 0.30217787623405457, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_5/centered_abs_mean": 0.23083798289299012, "signal/frontier_coverage_5/group_bin_occupancy": 0.898611111111111, "signal/frontier_coverage_5/group_std_mean": 0.30217787623405457, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002885474869981408, "signal/frontier_ece_reward/centered_abs_mean": 0.05384446457028389, "signal/frontier_ece_reward/group_bin_occupancy": 0.8677083333333334, "signal/frontier_ece_reward/group_std_mean": 0.08003931492567062, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005384446494281292, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005384446494281292, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33434916734695436, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7777777777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4062751352787018, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03343491479754448, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03343491479754448, "step": 60 }, { "calibration/aurc": 0.23656779693400476, "calibration/batch_distribution_entropy": 0.9777875846527511, "calibration/batch_entropy_100bins": 0.9566050467615526, "calibration/batch_entropy_10bins": 0.9777875846527511, "calibration/batch_entropy_50bins": 0.9694041980201531, "calibration/batch_uniqueness": 0.951238184119107, "calibration/buffer_distribution_entropy": 0.8348047983245668, "calibration/buffer_entropy_100bins": 0.7944202847632227, "calibration/buffer_entropy_10bins": 0.8348047983245668, "calibration/buffer_entropy_50bins": 0.8353967415859247, "calibration/confidence_entropy": 0.5115842716518644, "calibration/coverage@0%": 0.010110893193929047, "calibration/coverage@1%": 0.010110893193929047, "calibration/coverage@10%": 0.18960810918728915, "calibration/coverage@15%": 0.3931437969466637, "calibration/coverage@20%": 0.5138705409531458, "calibration/coverage@25%": 0.6643819070694328, "calibration/coverage@30%": 0.7248, "calibration/coverage@5%": 0.044939389236145405, "calibration/ece": 0.1552912149271485, "calibration/mean_confidence": 0.5501857656980181, "calibration/prompt_uniqueness": 0.8891747813176986, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022743055555555537, "completions/max_length": 3102.6, "completions/max_terminated_length": 3102.6, "completions/mean_length": 722.5470581054688, "completions/mean_terminated_length": 739.3680297851563, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.1559980500243747, "grad_norm": 0.00044656230602413416, "learning_rate": 4.307228915662651e-06, "loss": -0.0191, "num_tokens": 132532747.0, "reward": 0.9545995354652405, "reward_std": 0.16012048721313477, "rewards/accuracy_reward": 0.6457465291023254, "rewards/brier_reward": 0.7412284135818481, "rewards/confidence_uniqueness_reward": 0.9310332775115967, "rewards/format_reward": 0.9772569417953492, "rewards/frontier_aurc_reward": -0.0018141154432669282, "rewards/frontier_coverage_0": -0.018469791370444, "rewards/frontier_coverage_1": -0.018469791370444, "rewards/frontier_coverage_10": -0.018469791370444, "rewards/frontier_coverage_15": -0.018469791370444, "rewards/frontier_coverage_20": -0.018469791370444, "rewards/frontier_coverage_25": -0.018469791370444, "rewards/frontier_coverage_5": -0.018469791370444, "rewards/frontier_ece_reward": 0.006174688460305333, "rewards/frontier_entropy_batch_reward": -0.23107051253318786, "signal/accuracy_reward/centered_abs_mean": 0.17658962607383727, "signal/accuracy_reward/group_bin_occupancy": 0.21319444444444446, "signal/accuracy_reward/group_std_mean": 0.2395526260137558, "signal/accuracy_reward/group_zero_std_frac": 0.2944444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08829481303691863, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08829481303691863, "signal/advantage_abs_mean": 0.11682901531457901, "signal/advantage_pre_scale_abs_mean": 0.11682901531457901, "signal/advantage_pre_scale_std": 0.18256474137306214, "signal/advantage_std": 0.18256474137306214, "signal/brier_reward/centered_abs_mean": 0.19553665220737457, "signal/brier_reward/group_bin_occupancy": 0.8847222222222223, "signal/brier_reward/group_std_mean": 0.2446680635213852, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019553666189312934, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019553666189312934, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04568360410630703, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7583333333333333, "signal/confidence_uniqueness_reward/group_std_mean": 0.07737514227628708, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004568360652774573, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004568360652774573, "signal/format_reward/centered_abs_mean": 0.03725043348968029, "signal/format_reward/group_bin_occupancy": 0.1590277777777778, "signal/format_reward/group_std_mean": 0.06770127713680267, "signal/format_reward/group_zero_std_frac": 0.7277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.018625216744840144, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.018625216744840144, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014472146751359106, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6895833333333333, "signal/frontier_aurc_reward/group_std_mean": 0.0024238450918346644, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8090184312313795e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8090184312313795e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22579068541526795, "signal/frontier_coverage_0/group_bin_occupancy": 0.8784722222222221, "signal/frontier_coverage_0/group_std_mean": 0.2969242215156555, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_1/centered_abs_mean": 0.22579068541526795, "signal/frontier_coverage_1/group_bin_occupancy": 0.8784722222222221, "signal/frontier_coverage_1/group_std_mean": 0.2969242215156555, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_10/centered_abs_mean": 0.22579068541526795, "signal/frontier_coverage_10/group_bin_occupancy": 0.8784722222222221, "signal/frontier_coverage_10/group_std_mean": 0.2969242215156555, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_15/centered_abs_mean": 0.22579068541526795, "signal/frontier_coverage_15/group_bin_occupancy": 0.8784722222222221, "signal/frontier_coverage_15/group_std_mean": 0.2969242215156555, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_20/centered_abs_mean": 0.22579068541526795, "signal/frontier_coverage_20/group_bin_occupancy": 0.8784722222222221, "signal/frontier_coverage_20/group_std_mean": 0.2969242215156555, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_25/centered_abs_mean": 0.22579068541526795, "signal/frontier_coverage_25/group_bin_occupancy": 0.8784722222222221, "signal/frontier_coverage_25/group_std_mean": 0.2969242215156555, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_5/centered_abs_mean": 0.22579068541526795, "signal/frontier_coverage_5/group_bin_occupancy": 0.8784722222222221, "signal/frontier_coverage_5/group_std_mean": 0.2969242215156555, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028223836328834296, "signal/frontier_ece_reward/centered_abs_mean": 0.050039660185575485, "signal/frontier_ece_reward/group_bin_occupancy": 0.8552083333333333, "signal/frontier_ece_reward/group_std_mean": 0.07259590029716492, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005003966204822063, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005003966204822063, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3180016040802002, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7805555555555556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39476526975631715, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03180016092956066, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03180016092956066, "step": 65 }, { "calibration/aurc": 0.2948536564114072, "calibration/batch_distribution_entropy": 0.9733400726258928, "calibration/batch_entropy_100bins": 0.9536061745510522, "calibration/batch_entropy_10bins": 0.9733400726258928, "calibration/batch_entropy_50bins": 0.9674596609580579, "calibration/batch_uniqueness": 0.949705160752441, "calibration/buffer_distribution_entropy": 0.8540476081873998, "calibration/buffer_entropy_100bins": 0.8192057037753948, "calibration/buffer_entropy_10bins": 0.8540476081873998, "calibration/buffer_entropy_50bins": 0.8557242505965151, "calibration/confidence_entropy": 0.49067170223761797, "calibration/coverage@0%": 0.022465164274228008, "calibration/coverage@1%": 0.022465164274228008, "calibration/coverage@10%": 0.04398193654670511, "calibration/coverage@15%": 0.11319772520337099, "calibration/coverage@20%": 0.20620307161185872, "calibration/coverage@25%": 0.3230422460624132, "calibration/coverage@30%": 0.46851682623276936, "calibration/coverage@5%": 0.032168668317354696, "calibration/ece": 0.16806239032062145, "calibration/mean_confidence": 0.5722495755231523, "calibration/prompt_uniqueness": 0.8687714538093431, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3525.8, "completions/max_terminated_length": 3525.8, "completions/mean_length": 725.2604125976562, "completions/mean_terminated_length": 742.6922485351563, "completions/min_length": 0.0, "completions/min_terminated_length": 192.6, "epoch": 0.16799790002624967, "grad_norm": 0.00043501320760697126, "learning_rate": 4.156626506024097e-06, "loss": -0.0206, "num_tokens": 143965891.0, "reward": 0.9423856019973755, "reward_std": 0.16414594948291777, "rewards/accuracy_reward": 0.6236111164093018, "rewards/brier_reward": 0.7365549683570862, "rewards/confidence_uniqueness_reward": 0.9279781222343445, "rewards/format_reward": 0.9764757037162781, "rewards/frontier_aurc_reward": -0.0020766297122463582, "rewards/frontier_coverage_0": 0.0018009988591074944, "rewards/frontier_coverage_1": 0.0018009988591074944, "rewards/frontier_coverage_10": 0.0018009988591074944, "rewards/frontier_coverage_15": 0.0018009988591074944, "rewards/frontier_coverage_20": 0.0018009988591074944, "rewards/frontier_coverage_25": 0.0018009988591074944, "rewards/frontier_coverage_5": 0.0018009988591074944, "rewards/frontier_ece_reward": 0.009368815366178751, "rewards/frontier_entropy_batch_reward": -0.25179632306098937, "signal/accuracy_reward/centered_abs_mean": 0.1868381083011627, "signal/accuracy_reward/group_bin_occupancy": 0.21284722222222224, "signal/accuracy_reward/group_std_mean": 0.24697498977184296, "signal/accuracy_reward/group_zero_std_frac": 0.2972222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09341905415058135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09341905415058135, "signal/advantage_abs_mean": 0.12035283744335175, "signal/advantage_pre_scale_abs_mean": 0.12035283744335175, "signal/advantage_pre_scale_std": 0.1880349338054657, "signal/advantage_std": 0.1880349338054657, "signal/brier_reward/centered_abs_mean": 0.20086349546909332, "signal/brier_reward/group_bin_occupancy": 0.8625, "signal/brier_reward/group_std_mean": 0.2517173230648041, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020086349919438362, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020086349919438362, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05000351741909981, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7725694444444444, "signal/confidence_uniqueness_reward/group_std_mean": 0.08059784770011902, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005000351928174496, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005000351928174496, "signal/format_reward/centered_abs_mean": 0.03988172635436058, "signal/format_reward/group_bin_occupancy": 0.15659722222222222, "signal/format_reward/group_std_mean": 0.06865522116422654, "signal/format_reward/group_zero_std_frac": 0.7472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01994086317718029, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01994086317718029, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018553413217887282, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6861111111111111, "signal/frontier_aurc_reward/group_std_mean": 0.0029855409171432256, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3191767104435713e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3191767104435713e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22352704107761384, "signal/frontier_coverage_0/group_bin_occupancy": 0.8545138888888889, "signal/frontier_coverage_0/group_std_mean": 0.29782513380050657, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_1/centered_abs_mean": 0.22352704107761384, "signal/frontier_coverage_1/group_bin_occupancy": 0.8545138888888889, "signal/frontier_coverage_1/group_std_mean": 0.29782513380050657, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_10/centered_abs_mean": 0.22352704107761384, "signal/frontier_coverage_10/group_bin_occupancy": 0.8545138888888889, "signal/frontier_coverage_10/group_std_mean": 0.29782513380050657, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_15/centered_abs_mean": 0.22352704107761384, "signal/frontier_coverage_15/group_bin_occupancy": 0.8545138888888889, "signal/frontier_coverage_15/group_std_mean": 0.29782513380050657, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_20/centered_abs_mean": 0.22352704107761384, "signal/frontier_coverage_20/group_bin_occupancy": 0.8545138888888889, "signal/frontier_coverage_20/group_std_mean": 0.29782513380050657, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_25/centered_abs_mean": 0.22352704107761384, "signal/frontier_coverage_25/group_bin_occupancy": 0.8545138888888889, "signal/frontier_coverage_25/group_std_mean": 0.29782513380050657, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_5/centered_abs_mean": 0.22352704107761384, "signal/frontier_coverage_5/group_bin_occupancy": 0.8545138888888889, "signal/frontier_coverage_5/group_std_mean": 0.29782513380050657, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027940880972892046, "signal/frontier_ece_reward/centered_abs_mean": 0.04868664965033531, "signal/frontier_ece_reward/group_bin_occupancy": 0.8552083333333333, "signal/frontier_ece_reward/group_std_mean": 0.07001925408840179, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004868665337562561, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004868665337562561, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3257962942123413, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7805555555555557, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4016845703125, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03257962986826897, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03257962986826897, "step": 70 }, { "calibration/aurc": 0.21711738393930857, "calibration/batch_distribution_entropy": 0.9600224396836108, "calibration/batch_entropy_100bins": 0.9466081240419243, "calibration/batch_entropy_10bins": 0.9600224396836108, "calibration/batch_entropy_50bins": 0.9598622254467092, "calibration/batch_uniqueness": 0.9482612156029203, "calibration/buffer_distribution_entropy": 0.868126624309034, "calibration/buffer_entropy_100bins": 0.8390208000202177, "calibration/buffer_entropy_10bins": 0.868126624309034, "calibration/buffer_entropy_50bins": 0.8714659369493593, "calibration/confidence_entropy": 0.5153322298773169, "calibration/coverage@0%": 0.042552211392207835, "calibration/coverage@1%": 0.042552211392207835, "calibration/coverage@10%": 0.16360546316683083, "calibration/coverage@15%": 0.39607171121408147, "calibration/coverage@20%": 0.5625821359146268, "calibration/coverage@25%": 0.6644978024470013, "calibration/coverage@30%": 0.7292566187847698, "calibration/coverage@5%": 0.05221326791108637, "calibration/ece": 0.1500837966971388, "calibration/mean_confidence": 0.5832129899789663, "calibration/prompt_uniqueness": 0.8774171238232608, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01753472222222221, "completions/max_length": 3559.8, "completions/max_terminated_length": 3559.8, "completions/mean_length": 735.5065185546875, "completions/mean_terminated_length": 748.6990966796875, "completions/min_length": 0.0, "completions/min_terminated_length": 243.4, "epoch": 0.17999775002812465, "grad_norm": 0.0004371130489744246, "learning_rate": 4.006024096385543e-06, "loss": -0.0143, "num_tokens": 155503822.0, "reward": 0.9730815052986145, "reward_std": 0.1551417291164398, "rewards/accuracy_reward": 0.6793402791023254, "rewards/brier_reward": 0.7635630369186401, "rewards/confidence_uniqueness_reward": 0.9331399917602539, "rewards/format_reward": 0.9823784708976746, "rewards/frontier_aurc_reward": -0.0015793633414432407, "rewards/frontier_coverage_0": -0.01739480420947075, "rewards/frontier_coverage_1": -0.01739480420947075, "rewards/frontier_coverage_10": -0.01739480420947075, "rewards/frontier_coverage_15": -0.01739480420947075, "rewards/frontier_coverage_20": -0.01739480420947075, "rewards/frontier_coverage_25": -0.01739480420947075, "rewards/frontier_coverage_5": -0.01739480420947075, "rewards/frontier_ece_reward": 0.008008561190217733, "rewards/frontier_entropy_batch_reward": -0.267072793841362, "signal/accuracy_reward/centered_abs_mean": 0.18628471791744233, "signal/accuracy_reward/group_bin_occupancy": 0.21180555555555558, "signal/accuracy_reward/group_std_mean": 0.24508444964885712, "signal/accuracy_reward/group_zero_std_frac": 0.30555555522441863, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09314235895872117, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09314235895872117, "signal/advantage_abs_mean": 0.11387252360582352, "signal/advantage_pre_scale_abs_mean": 0.11387252360582352, "signal/advantage_pre_scale_std": 0.1787475287914276, "signal/advantage_std": 0.1787475287914276, "signal/brier_reward/centered_abs_mean": 0.18025039732456208, "signal/brier_reward/group_bin_occupancy": 0.8708333333333332, "signal/brier_reward/group_std_mean": 0.22753545939922332, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01802504062652588, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01802504062652588, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04140819758176804, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7996527777777779, "signal/confidence_uniqueness_reward/group_std_mean": 0.06981581598520278, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004140820214524865, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004140820214524865, "signal/format_reward/centered_abs_mean": 0.03043077252805233, "signal/format_reward/group_bin_occupancy": 0.15416666666666667, "signal/format_reward/group_std_mean": 0.05725453943014145, "signal/format_reward/group_zero_std_frac": 0.7666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015215386264026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015215386264026165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014222318306565285, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7027777777777777, "signal/frontier_aurc_reward/group_std_mean": 0.002294929837808013, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7777897664927877e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7777897664927877e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21255133748054506, "signal/frontier_coverage_0/group_bin_occupancy": 0.8583333333333332, "signal/frontier_coverage_0/group_std_mean": 0.2832107603549957, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_1/centered_abs_mean": 0.21255133748054506, "signal/frontier_coverage_1/group_bin_occupancy": 0.8583333333333332, "signal/frontier_coverage_1/group_std_mean": 0.2832107603549957, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_10/centered_abs_mean": 0.21255133748054506, "signal/frontier_coverage_10/group_bin_occupancy": 0.8583333333333332, "signal/frontier_coverage_10/group_std_mean": 0.2832107603549957, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_15/centered_abs_mean": 0.21255133748054506, "signal/frontier_coverage_15/group_bin_occupancy": 0.8583333333333332, "signal/frontier_coverage_15/group_std_mean": 0.2832107603549957, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_20/centered_abs_mean": 0.21255133748054506, "signal/frontier_coverage_20/group_bin_occupancy": 0.8583333333333332, "signal/frontier_coverage_20/group_std_mean": 0.2832107603549957, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_25/centered_abs_mean": 0.21255133748054506, "signal/frontier_coverage_25/group_bin_occupancy": 0.8583333333333332, "signal/frontier_coverage_25/group_std_mean": 0.2832107603549957, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_5/centered_abs_mean": 0.21255133748054506, "signal/frontier_coverage_5/group_bin_occupancy": 0.8583333333333332, "signal/frontier_coverage_5/group_std_mean": 0.2832107603549957, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002656891755759716, "signal/frontier_ece_reward/centered_abs_mean": 0.042948073148727416, "signal/frontier_ece_reward/group_bin_occupancy": 0.8493055555555555, "signal/frontier_ece_reward/group_std_mean": 0.06208599209785461, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004294807370752096, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004294807370752096, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3377181708812714, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.78125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4080219030380249, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033771815896034243, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033771815896034243, "step": 75 }, { "calibration/aurc": 0.19194942395941994, "calibration/batch_distribution_entropy": 0.9505285940513144, "calibration/batch_entropy_100bins": 0.9470248836010062, "calibration/batch_entropy_10bins": 0.9505285940513144, "calibration/batch_entropy_50bins": 0.9568973139463551, "calibration/batch_uniqueness": 0.9461827633448667, "calibration/buffer_distribution_entropy": 0.8791427141187192, "calibration/buffer_entropy_100bins": 0.8552186845109102, "calibration/buffer_entropy_10bins": 0.8791427141187192, "calibration/buffer_entropy_50bins": 0.884118126026354, "calibration/confidence_entropy": 0.4923999060736496, "calibration/coverage@0%": 0.026547209660839854, "calibration/coverage@1%": 0.026547209660839854, "calibration/coverage@10%": 0.2988454440806766, "calibration/coverage@15%": 0.41392928557371744, "calibration/coverage@20%": 0.5923071216089244, "calibration/coverage@25%": 0.7086820744187616, "calibration/coverage@30%": 0.8250354274917335, "calibration/coverage@5%": 0.10274072601183652, "calibration/ece": 0.18467594070419555, "calibration/mean_confidence": 0.616226704959489, "calibration/prompt_uniqueness": 0.8645967746439295, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021788194444444443, "completions/max_length": 3473.2, "completions/max_terminated_length": 3473.2, "completions/mean_length": 765.4503540039062, "completions/mean_terminated_length": 782.624853515625, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.19199760002999963, "grad_norm": 0.00039678241591900587, "learning_rate": 3.855421686746989e-06, "loss": -0.0185, "num_tokens": 167375090.0, "reward": 0.9544713973999024, "reward_std": 0.15620069205760956, "rewards/accuracy_reward": 0.6460069417953491, "rewards/brier_reward": 0.7477240562438965, "rewards/confidence_uniqueness_reward": 0.9298016190528869, "rewards/format_reward": 0.978124988079071, "rewards/frontier_aurc_reward": -0.0018646372482180595, "rewards/frontier_coverage_0": -0.009632312413305043, "rewards/frontier_coverage_1": -0.009632312413305043, "rewards/frontier_coverage_10": -0.009632312413305043, "rewards/frontier_coverage_15": -0.009632312413305043, "rewards/frontier_coverage_20": -0.009632312413305043, "rewards/frontier_coverage_25": -0.009632312413305043, "rewards/frontier_coverage_5": -0.009632312413305043, "rewards/frontier_ece_reward": 0.008436152525246144, "rewards/frontier_entropy_batch_reward": -0.25324631929397584, "signal/accuracy_reward/centered_abs_mean": 0.18125, "signal/accuracy_reward/group_bin_occupancy": 0.21354166666666666, "signal/accuracy_reward/group_std_mean": 0.24379155337810515, "signal/accuracy_reward/group_zero_std_frac": 0.29166666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.090625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.090625, "signal/advantage_abs_mean": 0.11512753665447235, "signal/advantage_pre_scale_abs_mean": 0.11512753665447235, "signal/advantage_pre_scale_std": 0.17972079813480377, "signal/advantage_std": 0.17972079813480377, "signal/brier_reward/centered_abs_mean": 0.18024792075157164, "signal/brier_reward/group_bin_occupancy": 0.8690972222222222, "signal/brier_reward/group_std_mean": 0.22731645703315734, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018024792522192003, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018024792522192003, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04387593828141689, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.804861111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.06978548243641854, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004387593921273946, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004387593921273946, "signal/format_reward/centered_abs_mean": 0.03373480923473835, "signal/format_reward/group_bin_occupancy": 0.15243055555555557, "signal/format_reward/group_std_mean": 0.057917628437280655, "signal/format_reward/group_zero_std_frac": 0.7805555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016867404617369176, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016867404617369176, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017565070651471616, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6854166666666667, "signal/frontier_aurc_reward/group_std_mean": 0.0028648764360696076, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1956339696771466e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1956339696771466e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20747113823890687, "signal/frontier_coverage_0/group_bin_occupancy": 0.85, "signal/frontier_coverage_0/group_std_mean": 0.27563032507896423, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_1/centered_abs_mean": 0.20747113823890687, "signal/frontier_coverage_1/group_bin_occupancy": 0.85, "signal/frontier_coverage_1/group_std_mean": 0.27563032507896423, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_10/centered_abs_mean": 0.20747113823890687, "signal/frontier_coverage_10/group_bin_occupancy": 0.85, "signal/frontier_coverage_10/group_std_mean": 0.27563032507896423, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_15/centered_abs_mean": 0.20747113823890687, "signal/frontier_coverage_15/group_bin_occupancy": 0.85, "signal/frontier_coverage_15/group_std_mean": 0.27563032507896423, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_20/centered_abs_mean": 0.20747113823890687, "signal/frontier_coverage_20/group_bin_occupancy": 0.85, "signal/frontier_coverage_20/group_std_mean": 0.27563032507896423, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_25/centered_abs_mean": 0.20747113823890687, "signal/frontier_coverage_25/group_bin_occupancy": 0.85, "signal/frontier_coverage_25/group_std_mean": 0.27563032507896423, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_5/centered_abs_mean": 0.20747113823890687, "signal/frontier_coverage_5/group_bin_occupancy": 0.85, "signal/frontier_coverage_5/group_std_mean": 0.27563032507896423, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025933892466127872, "signal/frontier_ece_reward/centered_abs_mean": 0.04286099076271057, "signal/frontier_ece_reward/group_bin_occupancy": 0.8486111111111111, "signal/frontier_ece_reward/group_std_mean": 0.06143382340669632, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004286099225282669, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004286099225282669, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32150877714157106, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763888888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39428759813308717, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03215087540447712, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03215087540447712, "step": 80 }, { "calibration/aurc": 0.20464321287276627, "calibration/batch_distribution_entropy": 0.9734846034862557, "calibration/batch_entropy_100bins": 0.9582855024671899, "calibration/batch_entropy_10bins": 0.9734846034862557, "calibration/batch_entropy_50bins": 0.9687551046359546, "calibration/batch_uniqueness": 0.9506750992026983, "calibration/buffer_distribution_entropy": 0.8894459259708161, "calibration/buffer_entropy_100bins": 0.8693461376608582, "calibration/buffer_entropy_10bins": 0.8894459259708161, "calibration/buffer_entropy_50bins": 0.8952655505976255, "calibration/confidence_entropy": 0.49973062168606786, "calibration/coverage@0%": 0.017135361002953733, "calibration/coverage@1%": 0.017135361002953733, "calibration/coverage@10%": 0.19827927770527864, "calibration/coverage@15%": 0.34445682881222645, "calibration/coverage@20%": 0.4741548295452036, "calibration/coverage@25%": 0.7317244536788657, "calibration/coverage@30%": 0.83289196155607, "calibration/coverage@5%": 0.07288281577479874, "calibration/ece": 0.1435432468710961, "calibration/mean_confidence": 0.530408786754918, "calibration/prompt_uniqueness": 0.8742536493030706, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01605902777777779, "completions/max_length": 3906.4, "completions/max_terminated_length": 3906.4, "completions/mean_length": 765.7278686523438, "completions/mean_terminated_length": 778.280224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 265.2, "epoch": 0.2039974500318746, "grad_norm": 0.00038754488923586905, "learning_rate": 3.7048192771084342e-06, "loss": -0.0137, "num_tokens": 179283475.0, "reward": 0.9706011414527893, "reward_std": 0.15045669674873352, "rewards/accuracy_reward": 0.6691840291023254, "rewards/brier_reward": 0.7690122842788696, "rewards/confidence_uniqueness_reward": 0.9347800016403198, "rewards/format_reward": 0.9837673544883728, "rewards/frontier_aurc_reward": -0.001625478290952742, "rewards/frontier_coverage_0": -0.0016857189650181681, "rewards/frontier_coverage_1": -0.0016857189650181681, "rewards/frontier_coverage_10": -0.0016857189650181681, "rewards/frontier_coverage_15": -0.0016857189650181681, "rewards/frontier_coverage_20": -0.0016857189650181681, "rewards/frontier_coverage_25": -0.0016857189650181681, "rewards/frontier_coverage_5": -0.0016857189650181681, "rewards/frontier_ece_reward": 0.010668071359395981, "rewards/frontier_entropy_batch_reward": -0.2715276062488556, "signal/accuracy_reward/centered_abs_mean": 0.18636610209941865, "signal/accuracy_reward/group_bin_occupancy": 0.21215277777777777, "signal/accuracy_reward/group_std_mean": 0.2449056774377823, "signal/accuracy_reward/group_zero_std_frac": 0.30277777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09318305104970932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09318305104970932, "signal/advantage_abs_mean": 0.11073940545320511, "signal/advantage_pre_scale_abs_mean": 0.11073940545320511, "signal/advantage_pre_scale_std": 0.17361874580383302, "signal/advantage_std": 0.17361874580383302, "signal/brier_reward/centered_abs_mean": 0.1742205113172531, "signal/brier_reward/group_bin_occupancy": 0.8534722222222222, "signal/brier_reward/group_std_mean": 0.22123693227767943, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0174220509827137, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0174220509827137, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03802314177155495, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8024305555555555, "signal/confidence_uniqueness_reward/group_std_mean": 0.06435777395963668, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038023141212761404, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038023141212761404, "signal/format_reward/centered_abs_mean": 0.02753363735973835, "signal/format_reward/group_bin_occupancy": 0.15208333333333335, "signal/format_reward/group_std_mean": 0.052114753425121306, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013766818679869175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013766818679869175, "signal/frontier_aurc_reward/centered_abs_mean": 0.001538053946569562, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6909722222222222, "signal/frontier_aurc_reward/group_std_mean": 0.0024671837454661727, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9225675350753592e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9225675350753592e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21704848110675812, "signal/frontier_coverage_0/group_bin_occupancy": 0.846875, "signal/frontier_coverage_0/group_std_mean": 0.28512428402900697, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_1/centered_abs_mean": 0.21704848110675812, "signal/frontier_coverage_1/group_bin_occupancy": 0.846875, "signal/frontier_coverage_1/group_std_mean": 0.28512428402900697, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_10/centered_abs_mean": 0.21704848110675812, "signal/frontier_coverage_10/group_bin_occupancy": 0.846875, "signal/frontier_coverage_10/group_std_mean": 0.28512428402900697, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_15/centered_abs_mean": 0.21704848110675812, "signal/frontier_coverage_15/group_bin_occupancy": 0.846875, "signal/frontier_coverage_15/group_std_mean": 0.28512428402900697, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_20/centered_abs_mean": 0.21704848110675812, "signal/frontier_coverage_20/group_bin_occupancy": 0.846875, "signal/frontier_coverage_20/group_std_mean": 0.28512428402900697, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_25/centered_abs_mean": 0.21704848110675812, "signal/frontier_coverage_25/group_bin_occupancy": 0.846875, "signal/frontier_coverage_25/group_std_mean": 0.28512428402900697, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_5/centered_abs_mean": 0.21704848110675812, "signal/frontier_coverage_5/group_bin_occupancy": 0.846875, "signal/frontier_coverage_5/group_std_mean": 0.28512428402900697, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002713105920702219, "signal/frontier_ece_reward/centered_abs_mean": 0.042044655233621595, "signal/frontier_ece_reward/group_bin_occupancy": 0.8402777777777777, "signal/frontier_ece_reward/group_std_mean": 0.05982731878757477, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004204465728253126, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004204465728253126, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3326686263084412, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7815972222222222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40350683927536013, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033266863971948626, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033266863971948626, "step": 85 }, { "calibration/aurc": 0.16738103314210595, "calibration/batch_distribution_entropy": 0.9697290198955569, "calibration/batch_entropy_100bins": 0.957681854552531, "calibration/batch_entropy_10bins": 0.9697290198955569, "calibration/batch_entropy_50bins": 0.9673187504894678, "calibration/batch_uniqueness": 0.9503381676350635, "calibration/buffer_distribution_entropy": 0.8989638723450991, "calibration/buffer_entropy_100bins": 0.8812589203185311, "calibration/buffer_entropy_10bins": 0.8989638723450991, "calibration/buffer_entropy_50bins": 0.9050185855286758, "calibration/confidence_entropy": 0.5001923409519085, "calibration/coverage@0%": 0.012599211874791133, "calibration/coverage@1%": 0.012599211874791133, "calibration/coverage@10%": 0.3671920254539378, "calibration/coverage@15%": 0.5097086071813152, "calibration/coverage@20%": 0.6111030851992127, "calibration/coverage@25%": 0.8049149330696966, "calibration/coverage@30%": 0.9180851063829788, "calibration/coverage@5%": 0.08726653299171958, "calibration/ece": 0.16554122268343638, "calibration/mean_confidence": 0.577438955676482, "calibration/prompt_uniqueness": 0.871450176841621, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018402777777777813, "completions/max_length": 3625.4, "completions/max_terminated_length": 3625.4, "completions/mean_length": 732.7577270507812, "completions/mean_terminated_length": 746.5180297851563, "completions/min_length": 0.0, "completions/min_terminated_length": 248.0, "epoch": 0.2159973000337496, "grad_norm": 0.0003915593842975795, "learning_rate": 3.5542168674698798e-06, "loss": -0.0155, "num_tokens": 190793516.0, "reward": 0.9725017189979553, "reward_std": 0.150723797082901, "rewards/accuracy_reward": 0.6779513835906983, "rewards/brier_reward": 0.7679128766059875, "rewards/confidence_uniqueness_reward": 0.9321897506713868, "rewards/format_reward": 0.9814236164093018, "rewards/frontier_aurc_reward": -0.0015417236601933837, "rewards/frontier_coverage_0": -0.012057388108223677, "rewards/frontier_coverage_1": -0.012057388108223677, "rewards/frontier_coverage_10": -0.012057388108223677, "rewards/frontier_coverage_15": -0.012057388108223677, "rewards/frontier_coverage_20": -0.012057388108223677, "rewards/frontier_coverage_25": -0.012057388108223677, "rewards/frontier_coverage_5": -0.012057388108223677, "rewards/frontier_ece_reward": 0.009636924415826798, "rewards/frontier_entropy_batch_reward": -0.270854526758194, "signal/accuracy_reward/centered_abs_mean": 0.17960069477558135, "signal/accuracy_reward/group_bin_occupancy": 0.20625, "signal/accuracy_reward/group_std_mean": 0.2330833613872528, "signal/accuracy_reward/group_zero_std_frac": 0.35, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08980034738779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08980034738779068, "signal/advantage_abs_mean": 0.11121289134025573, "signal/advantage_pre_scale_abs_mean": 0.11121289134025573, "signal/advantage_pre_scale_std": 0.1761443316936493, "signal/advantage_std": 0.1761443316936493, "signal/brier_reward/centered_abs_mean": 0.17101071774959564, "signal/brier_reward/group_bin_occupancy": 0.851388888888889, "signal/brier_reward/group_std_mean": 0.21667629480361938, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017101072520017625, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017101072520017625, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04075642824172974, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7923611111111112, "signal/confidence_uniqueness_reward/group_std_mean": 0.0677463486790657, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0040756430942565204, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0040756430942565204, "signal/format_reward/centered_abs_mean": 0.0303819440305233, "signal/format_reward/group_bin_occupancy": 0.15277777777777776, "signal/format_reward/group_std_mean": 0.055435144901275636, "signal/format_reward/group_zero_std_frac": 0.7777777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01519097201526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01519097201526165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016192136332392692, "signal/frontier_aurc_reward/group_bin_occupancy": 0.678125, "signal/frontier_aurc_reward/group_std_mean": 0.002686009602621198, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.02401693968568e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.02401693968568e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2019166976213455, "signal/frontier_coverage_0/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_0/group_std_mean": 0.26531084775924685, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_1/centered_abs_mean": 0.2019166976213455, "signal/frontier_coverage_1/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_1/group_std_mean": 0.26531084775924685, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_10/centered_abs_mean": 0.2019166976213455, "signal/frontier_coverage_10/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_10/group_std_mean": 0.26531084775924685, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_15/centered_abs_mean": 0.2019166976213455, "signal/frontier_coverage_15/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_15/group_std_mean": 0.26531084775924685, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_20/centered_abs_mean": 0.2019166976213455, "signal/frontier_coverage_20/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_20/group_std_mean": 0.26531084775924685, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_25/centered_abs_mean": 0.2019166976213455, "signal/frontier_coverage_25/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_25/group_std_mean": 0.26531084775924685, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_5/centered_abs_mean": 0.2019166976213455, "signal/frontier_coverage_5/group_bin_occupancy": 0.845486111111111, "signal/frontier_coverage_5/group_std_mean": 0.26531084775924685, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025239587295800446, "signal/frontier_ece_reward/centered_abs_mean": 0.03998432978987694, "signal/frontier_ece_reward/group_bin_occupancy": 0.825, "signal/frontier_ece_reward/group_std_mean": 0.056585590541362765, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0039984329603612425, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0039984329603612425, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32754635214805605, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7697916666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3992260992527008, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03275463432073593, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03275463432073593, "step": 90 }, { "calibration/aurc": 0.2147605719417338, "calibration/batch_distribution_entropy": 0.967722082431089, "calibration/batch_entropy_100bins": 0.9534754754805925, "calibration/batch_entropy_10bins": 0.967722082431089, "calibration/batch_entropy_50bins": 0.9641603425298937, "calibration/batch_uniqueness": 0.9504319540791502, "calibration/buffer_distribution_entropy": 0.906951265687233, "calibration/buffer_entropy_100bins": 0.8916355598204531, "calibration/buffer_entropy_10bins": 0.906951265687233, "calibration/buffer_entropy_50bins": 0.9133523836950388, "calibration/confidence_entropy": 0.5045424732246998, "calibration/coverage@0%": 0.02540387226948959, "calibration/coverage@1%": 0.02540387226948959, "calibration/coverage@10%": 0.16098412853940106, "calibration/coverage@15%": 0.5059969701039667, "calibration/coverage@20%": 0.5992519983512614, "calibration/coverage@25%": 0.6664830665649518, "calibration/coverage@30%": 0.7247192628504611, "calibration/coverage@5%": 0.03228217914779647, "calibration/ece": 0.16749806640188375, "calibration/mean_confidence": 0.5591985724521795, "calibration/prompt_uniqueness": 0.8673264458396595, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 2774.4, "completions/max_terminated_length": 2774.4, "completions/mean_length": 741.1726684570312, "completions/mean_terminated_length": 750.5460327148437, "completions/min_length": 0.0, "completions/min_terminated_length": 251.0, "epoch": 0.22799715003562457, "grad_norm": 0.0003392553189769387, "learning_rate": 3.4036144578313257e-06, "loss": -0.0104, "num_tokens": 202423505.0, "reward": 0.9711825489997864, "reward_std": 0.1370842456817627, "rewards/accuracy_reward": 0.6624131798744202, "rewards/brier_reward": 0.7755590438842773, "rewards/confidence_uniqueness_reward": 0.938760507106781, "rewards/format_reward": 0.9873263955116272, "rewards/frontier_aurc_reward": -0.0015440285205841064, "rewards/frontier_coverage_0": 0.004539217054843903, "rewards/frontier_coverage_1": 0.004539217054843903, "rewards/frontier_coverage_10": 0.004539217054843903, "rewards/frontier_coverage_15": 0.004539217054843903, "rewards/frontier_coverage_20": 0.004539217054843903, "rewards/frontier_coverage_25": 0.004539217054843903, "rewards/frontier_coverage_5": 0.004539217054843903, "rewards/frontier_ece_reward": 0.009655746817588805, "rewards/frontier_entropy_batch_reward": -0.26462686955928805, "signal/accuracy_reward/centered_abs_mean": 0.15812174379825591, "signal/accuracy_reward/group_bin_occupancy": 0.20381944444444441, "signal/accuracy_reward/group_std_mean": 0.21500875651836396, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07906087189912796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07906087189912796, "signal/advantage_abs_mean": 0.09966149926185608, "signal/advantage_pre_scale_abs_mean": 0.09966149926185608, "signal/advantage_pre_scale_std": 0.15955499708652496, "signal/advantage_std": 0.15955499708652496, "signal/brier_reward/centered_abs_mean": 0.16119154393672944, "signal/brier_reward/group_bin_occupancy": 0.8548611111111111, "signal/brier_reward/group_std_mean": 0.20481694340705872, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016119154170155525, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016119154170155525, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03265211023390293, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8354166666666668, "signal/confidence_uniqueness_reward/group_std_mean": 0.054433510452508924, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003265211218968034, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003265211218968034, "signal/format_reward/centered_abs_mean": 0.02172309048473835, "signal/format_reward/group_bin_occupancy": 0.14618055555555554, "signal/format_reward/group_std_mean": 0.041437828540802, "signal/format_reward/group_zero_std_frac": 0.8305555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010861545242369175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010861545242369175, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014647976960986853, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6805555555555556, "signal/frontier_aurc_reward/group_std_mean": 0.0023766457568854095, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.83099717105506e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.83099717105506e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19489201307296752, "signal/frontier_coverage_0/group_bin_occupancy": 0.8607638888888889, "signal/frontier_coverage_0/group_std_mean": 0.25580963492393494, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_1/centered_abs_mean": 0.19489201307296752, "signal/frontier_coverage_1/group_bin_occupancy": 0.8607638888888889, "signal/frontier_coverage_1/group_std_mean": 0.25580963492393494, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_10/centered_abs_mean": 0.19489201307296752, "signal/frontier_coverage_10/group_bin_occupancy": 0.8607638888888889, "signal/frontier_coverage_10/group_std_mean": 0.25580963492393494, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_15/centered_abs_mean": 0.19489201307296752, "signal/frontier_coverage_15/group_bin_occupancy": 0.8607638888888889, "signal/frontier_coverage_15/group_std_mean": 0.25580963492393494, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_20/centered_abs_mean": 0.19489201307296752, "signal/frontier_coverage_20/group_bin_occupancy": 0.8607638888888889, "signal/frontier_coverage_20/group_std_mean": 0.25580963492393494, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_25/centered_abs_mean": 0.19489201307296752, "signal/frontier_coverage_25/group_bin_occupancy": 0.8607638888888889, "signal/frontier_coverage_25/group_std_mean": 0.25580963492393494, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_5/centered_abs_mean": 0.19489201307296752, "signal/frontier_coverage_5/group_bin_occupancy": 0.8607638888888889, "signal/frontier_coverage_5/group_std_mean": 0.25580963492393494, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024361501913517714, "signal/frontier_ece_reward/centered_abs_mean": 0.03678325191140175, "signal/frontier_ece_reward/group_bin_occupancy": 0.8333333333333333, "signal/frontier_ece_reward/group_std_mean": 0.05242298766970634, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036783250980079174, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036783250980079174, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3345192611217499, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7840277777777777, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40661893486976625, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03345192670822143, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03345192670822143, "step": 95 }, { "calibration/aurc": 0.1708679078638365, "calibration/batch_distribution_entropy": 0.9795370467281626, "calibration/batch_entropy_100bins": 0.9590961086475867, "calibration/batch_entropy_10bins": 0.9795370467281626, "calibration/batch_entropy_50bins": 0.9723127936886226, "calibration/batch_uniqueness": 0.951951389118833, "calibration/buffer_distribution_entropy": 0.9131616196726149, "calibration/buffer_entropy_100bins": 0.9000533082549502, "calibration/buffer_entropy_10bins": 0.9131616196726149, "calibration/buffer_entropy_50bins": 0.9200639230855394, "calibration/confidence_entropy": 0.5023486182458435, "calibration/coverage@0%": 0.006895599473400167, "calibration/coverage@1%": 0.006895599473400167, "calibration/coverage@10%": 0.22638525328449907, "calibration/coverage@15%": 0.5124370909762469, "calibration/coverage@20%": 0.7455989620373901, "calibration/coverage@25%": 0.8765314713019363, "calibration/coverage@30%": 0.9390113971602606, "calibration/coverage@5%": 0.01972982407233065, "calibration/ece": 0.16478280317365548, "calibration/mean_confidence": 0.5564289649632773, "calibration/prompt_uniqueness": 0.8679693861343288, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016579861111111115, "completions/max_length": 3928.4, "completions/max_terminated_length": 3928.4, "completions/mean_length": 758.4973876953125, "completions/mean_terminated_length": 771.2429443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 239.0, "epoch": 0.23999700003749952, "grad_norm": 0.00033934120438061655, "learning_rate": 3.2530120481927713e-06, "loss": -0.0122, "num_tokens": 214260467.0, "reward": 0.9764691114425659, "reward_std": 0.14216096699237823, "rewards/accuracy_reward": 0.6714409708976745, "rewards/brier_reward": 0.7798413872718811, "rewards/confidence_uniqueness_reward": 0.9353215217590332, "rewards/format_reward": 0.9833333253860473, "rewards/frontier_aurc_reward": -0.0014284017262980342, "rewards/frontier_coverage_0": 0.012153985630720853, "rewards/frontier_coverage_1": 0.012153985630720853, "rewards/frontier_coverage_10": 0.012153985630720853, "rewards/frontier_coverage_15": 0.012153985630720853, "rewards/frontier_coverage_20": 0.012153985630720853, "rewards/frontier_coverage_25": 0.012153985630720853, "rewards/frontier_coverage_5": 0.012153985630720853, "rewards/frontier_ece_reward": 0.012064610421657563, "rewards/frontier_entropy_batch_reward": -0.24686425030231476, "signal/accuracy_reward/centered_abs_mean": 0.17261826992034912, "signal/accuracy_reward/group_bin_occupancy": 0.20659722222222224, "signal/accuracy_reward/group_std_mean": 0.22805944979190826, "signal/accuracy_reward/group_zero_std_frac": 0.34722222089767457, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08630913496017456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08630913496017456, "signal/advantage_abs_mean": 0.10423466861248017, "signal/advantage_pre_scale_abs_mean": 0.10423466861248017, "signal/advantage_pre_scale_std": 0.1661255478858948, "signal/advantage_std": 0.1661255478858948, "signal/brier_reward/centered_abs_mean": 0.16386253237724305, "signal/brier_reward/group_bin_occupancy": 0.853125, "signal/brier_reward/group_std_mean": 0.20879840552806855, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016386253573000432, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016386253573000432, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03649565950036049, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8076388888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.06122687980532646, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00364956590346992, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00364956590346992, "signal/format_reward/centered_abs_mean": 0.02623697929084301, "signal/format_reward/group_bin_occupancy": 0.15, "signal/format_reward/group_std_mean": 0.049075322598218916, "signal/format_reward/group_zero_std_frac": 0.800000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013118489645421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013118489645421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015159687958657742, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6760416666666667, "signal/frontier_aurc_reward/group_std_mean": 0.002456930186599493, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8949608784168958e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8949608784168958e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20491735637187958, "signal/frontier_coverage_0/group_bin_occupancy": 0.8486111111111111, "signal/frontier_coverage_0/group_std_mean": 0.2684360921382904, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_1/centered_abs_mean": 0.20491735637187958, "signal/frontier_coverage_1/group_bin_occupancy": 0.8486111111111111, "signal/frontier_coverage_1/group_std_mean": 0.2684360921382904, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_10/centered_abs_mean": 0.20491735637187958, "signal/frontier_coverage_10/group_bin_occupancy": 0.8486111111111111, "signal/frontier_coverage_10/group_std_mean": 0.2684360921382904, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_15/centered_abs_mean": 0.20491735637187958, "signal/frontier_coverage_15/group_bin_occupancy": 0.8486111111111111, "signal/frontier_coverage_15/group_std_mean": 0.2684360921382904, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_20/centered_abs_mean": 0.20491735637187958, "signal/frontier_coverage_20/group_bin_occupancy": 0.8486111111111111, "signal/frontier_coverage_20/group_std_mean": 0.2684360921382904, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_25/centered_abs_mean": 0.20491735637187958, "signal/frontier_coverage_25/group_bin_occupancy": 0.8486111111111111, "signal/frontier_coverage_25/group_std_mean": 0.2684360921382904, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_5/centered_abs_mean": 0.20491735637187958, "signal/frontier_coverage_5/group_bin_occupancy": 0.8486111111111111, "signal/frontier_coverage_5/group_std_mean": 0.2684360921382904, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002561466954648495, "signal/frontier_ece_reward/centered_abs_mean": 0.037827713042497636, "signal/frontier_ece_reward/group_bin_occupancy": 0.8420138888888887, "signal/frontier_ece_reward/group_std_mean": 0.05301511362195015, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003782771248370409, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003782771248370409, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3126159429550171, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7684027777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3858396053314209, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031261596083641055, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031261596083641055, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.16370137027749238, "eval_calibration/batch_distribution_entropy": 0.9336636571209378, "eval_calibration/batch_entropy_100bins": 0.7040574498413844, "eval_calibration/batch_entropy_10bins": 0.9336636571209378, "eval_calibration/batch_entropy_50bins": 0.7745602196729959, "eval_calibration/batch_uniqueness": 0.8935712853364551, "eval_calibration/buffer_distribution_entropy": 0.9176718551760042, "eval_calibration/buffer_entropy_100bins": 0.905262358950738, "eval_calibration/buffer_entropy_10bins": 0.9176718551760042, "eval_calibration/buffer_entropy_50bins": 0.9244583301099669, "eval_calibration/confidence_entropy": 0.4785545865961936, "eval_calibration/coverage@0%": 0.2054771505376344, "eval_calibration/coverage@1%": 0.2054771505376344, "eval_calibration/coverage@10%": 0.3649193548387097, "eval_calibration/coverage@15%": 0.5431787634408602, "eval_calibration/coverage@20%": 0.710853494623656, "eval_calibration/coverage@25%": 0.8108198924731184, "eval_calibration/coverage@30%": 0.9260752688172044, "eval_calibration/coverage@5%": 0.25924059139784944, "eval_calibration/ece": 0.19514030398666016, "eval_calibration/mean_confidence": 0.5611983746023261, "eval_calibration/prompt_uniqueness": 0.8935712853364551, "eval_completions/clipped_ratio": 0.013020833333333334, "eval_completions/max_length": 2318.1666666666665, "eval_completions/max_terminated_length": 2318.1666666666665, "eval_completions/mean_length": 748.2187703450521, "eval_completions/mean_terminated_length": 758.1032104492188, "eval_completions/min_length": 123.0, "eval_completions/min_terminated_length": 307.6666666666667, "eval_loss": 0.0, "eval_num_tokens": 214260467.0, "eval_reward": 0.8898186484972636, "eval_reward_std": 0.2399557630221049, "eval_rewards/accuracy_reward": 0.6519097288449606, "eval_rewards/brier_reward": 0.7833640774091085, "eval_rewards/confidence_uniqueness_reward": 0.8799956142902374, "eval_rewards/format_reward": 0.9852430522441864, "eval_rewards/frontier_aurc_reward": -0.0014197090058587492, "eval_rewards/frontier_coverage_0": 0.02340823287765185, "eval_rewards/frontier_coverage_1": 0.02340823287765185, "eval_rewards/frontier_coverage_10": 0.02340823287765185, "eval_rewards/frontier_coverage_15": 0.02340823287765185, "eval_rewards/frontier_coverage_20": 0.02340823287765185, "eval_rewards/frontier_coverage_25": 0.02340823287765185, "eval_rewards/frontier_coverage_5": 0.02340823287765185, "eval_rewards/frontier_ece_reward": 0.014000983831162253, "eval_rewards/frontier_entropy_batch_reward": -0.9852430522441864, "eval_runtime": 193.0887, "eval_samples_per_second": 5.179, "eval_signal/accuracy_reward/centered_abs_mean": 0.4361436615387599, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4730866402387619, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21807183076937994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21807183076937994, "eval_signal/advantage_abs_mean": 0.2088108335932096, "eval_signal/advantage_pre_scale_abs_mean": 0.2088108335932096, "eval_signal/advantage_pre_scale_std": 0.23898746818304062, "eval_signal/advantage_std": 0.23898746818304062, "eval_signal/brier_reward/centered_abs_mean": 0.2062627375125885, "eval_signal/brier_reward/group_bin_occupancy": 0.8611111111111112, "eval_signal/brier_reward/group_std_mean": 0.2609405269225438, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020626275179286797, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.020626275179286797, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05856152934332689, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40277777777777773, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09981899770597617, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005856153244773547, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005856153244773547, "eval_signal/format_reward/centered_abs_mean": 0.028049044621487457, "eval_signal/format_reward/group_bin_occupancy": 0.17013888888888887, "eval_signal/format_reward/group_std_mean": 0.07099391147494316, "eval_signal/format_reward/group_zero_std_frac": 0.638888900478681, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.014024522310743729, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.014024522310743729, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020108624982337155, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.670138888888889, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0036396855721250176, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5135781167288467e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5135781167288467e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2666911060611407, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8993055555555555, "eval_signal/frontier_coverage_0/group_std_mean": 0.37884485224882763, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2666911060611407, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8993055555555555, "eval_signal/frontier_coverage_1/group_std_mean": 0.37884485224882763, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.2666911060611407, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8993055555555555, "eval_signal/frontier_coverage_10/group_std_mean": 0.37884485224882763, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2666911060611407, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8993055555555555, "eval_signal/frontier_coverage_15/group_std_mean": 0.37884485224882763, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2666911060611407, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8993055555555555, "eval_signal/frontier_coverage_20/group_std_mean": 0.37884485224882763, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2666911060611407, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8993055555555555, "eval_signal/frontier_coverage_25/group_std_mean": 0.37884485224882763, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2666911060611407, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8993055555555555, "eval_signal/frontier_coverage_5/group_std_mean": 0.37884485224882763, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003333638849047323, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.041228462010622025, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.875, "eval_signal/frontier_ece_reward/group_std_mean": 0.05814546967546145, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041228463329995675, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041228463329995675, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.028049044621487457, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.17013888888888887, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07099391147494316, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.638888900478681, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0028049046328912177, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0028049046328912177, "eval_steps_per_second": 0.031, "step": 100 }, { "calibration/aurc": 0.29434606104219985, "calibration/batch_distribution_entropy": 0.9775606413732308, "calibration/batch_entropy_100bins": 0.9610723412250273, "calibration/batch_entropy_10bins": 0.9775606413732308, "calibration/batch_entropy_50bins": 0.9716482303553293, "calibration/batch_uniqueness": 0.9517786214609512, "calibration/buffer_distribution_entropy": 0.9202256372529669, "calibration/buffer_entropy_100bins": 0.9085458764385524, "calibration/buffer_entropy_10bins": 0.9202256372529669, "calibration/buffer_entropy_50bins": 0.9270568801851861, "calibration/confidence_entropy": 0.4876244774879632, "calibration/coverage@0%": 0.07971241988213293, "calibration/coverage@1%": 0.08024433477574996, "calibration/coverage@10%": 0.15648546952752299, "calibration/coverage@15%": 0.21573857236440247, "calibration/coverage@20%": 0.2910429458868611, "calibration/coverage@25%": 0.3874947635370921, "calibration/coverage@30%": 0.48382004564167536, "calibration/coverage@5%": 0.12439327094596273, "calibration/ece": 0.17036784676882605, "calibration/mean_confidence": 0.5628817806849853, "calibration/prompt_uniqueness": 0.8609082618643228, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017708333333333326, "completions/max_length": 3732.2, "completions/max_terminated_length": 3732.2, "completions/mean_length": 744.0646850585938, "completions/mean_terminated_length": 757.5498779296875, "completions/min_length": 0.0, "completions/min_terminated_length": 238.8, "epoch": 0.2519968500393745, "grad_norm": 0.0006606943206861615, "learning_rate": 3.1024096385542172e-06, "loss": -0.0156, "num_tokens": 225908956.0, "reward": 0.9728375434875488, "reward_std": 0.13976509720087052, "rewards/accuracy_reward": 0.6686632037162781, "rewards/brier_reward": 0.7778706073760986, "rewards/confidence_uniqueness_reward": 0.9337520241737366, "rewards/format_reward": 0.9821180582046509, "rewards/frontier_aurc_reward": -0.0013738935464061796, "rewards/frontier_coverage_0": 0.008083556871861219, "rewards/frontier_coverage_1": 0.008083556871861219, "rewards/frontier_coverage_10": 0.008083556871861219, "rewards/frontier_coverage_15": 0.008083556871861219, "rewards/frontier_coverage_20": 0.008083556871861219, "rewards/frontier_coverage_25": 0.008083556871861219, "rewards/frontier_coverage_5": 0.008083556871861219, "rewards/frontier_ece_reward": 0.011097485572099686, "rewards/frontier_entropy_batch_reward": -0.25515236556529997, "signal/accuracy_reward/centered_abs_mean": 0.16565212607383728, "signal/accuracy_reward/group_bin_occupancy": 0.19930555555555557, "signal/accuracy_reward/group_std_mean": 0.21403219997882844, "signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08282606303691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08282606303691864, "signal/advantage_abs_mean": 0.1027535393834114, "signal/advantage_pre_scale_abs_mean": 0.1027535393834114, "signal/advantage_pre_scale_std": 0.16651992797851561, "signal/advantage_std": 0.16651992797851561, "signal/brier_reward/centered_abs_mean": 0.1597402274608612, "signal/brier_reward/group_bin_occupancy": 0.845486111111111, "signal/brier_reward/group_std_mean": 0.20388856828212737, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01597402263432741, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01597402263432741, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039262811094522475, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7920138888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.06549572870135308, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003926281165331602, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003926281165331602, "signal/format_reward/centered_abs_mean": 0.02922091968357563, "signal/format_reward/group_bin_occupancy": 0.15173611111111113, "signal/format_reward/group_std_mean": 0.05369042381644249, "signal/format_reward/group_zero_std_frac": 0.7861111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014610459841787816, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014610459841787816, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014508004300296307, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6725694444444443, "signal/frontier_aurc_reward/group_std_mean": 0.0023758172057569025, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8135006212105508e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8135006212105508e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2051199734210968, "signal/frontier_coverage_0/group_bin_occupancy": 0.85625, "signal/frontier_coverage_0/group_std_mean": 0.2649015933275223, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_1/centered_abs_mean": 0.2051199734210968, "signal/frontier_coverage_1/group_bin_occupancy": 0.85625, "signal/frontier_coverage_1/group_std_mean": 0.2649015933275223, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_10/centered_abs_mean": 0.2051199734210968, "signal/frontier_coverage_10/group_bin_occupancy": 0.85625, "signal/frontier_coverage_10/group_std_mean": 0.2649015933275223, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_15/centered_abs_mean": 0.2051199734210968, "signal/frontier_coverage_15/group_bin_occupancy": 0.85625, "signal/frontier_coverage_15/group_std_mean": 0.2649015933275223, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_20/centered_abs_mean": 0.2051199734210968, "signal/frontier_coverage_20/group_bin_occupancy": 0.85625, "signal/frontier_coverage_20/group_std_mean": 0.2649015933275223, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_25/centered_abs_mean": 0.2051199734210968, "signal/frontier_coverage_25/group_bin_occupancy": 0.85625, "signal/frontier_coverage_25/group_std_mean": 0.2649015933275223, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_5/centered_abs_mean": 0.2051199734210968, "signal/frontier_coverage_5/group_bin_occupancy": 0.85625, "signal/frontier_coverage_5/group_std_mean": 0.2649015933275223, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025639997329562902, "signal/frontier_ece_reward/centered_abs_mean": 0.03703099712729454, "signal/frontier_ece_reward/group_bin_occupancy": 0.8190972222222224, "signal/frontier_ece_reward/group_std_mean": 0.051789505034685136, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037030997220426796, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037030997220426796, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31766087412834165, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.757638888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3910989761352539, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03176608793437481, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03176608793437481, "step": 105 }, { "calibration/aurc": 0.19799323292635163, "calibration/batch_distribution_entropy": 0.9537625669630876, "calibration/batch_entropy_100bins": 0.9453870980796439, "calibration/batch_entropy_10bins": 0.9537625669630876, "calibration/batch_entropy_50bins": 0.9547702004879739, "calibration/batch_uniqueness": 0.9466343035079527, "calibration/buffer_distribution_entropy": 0.9240580653843992, "calibration/buffer_entropy_100bins": 0.9147163586526406, "calibration/buffer_entropy_10bins": 0.9240580653843992, "calibration/buffer_entropy_50bins": 0.9315463111926698, "calibration/confidence_entropy": 0.4835072146320634, "calibration/coverage@0%": 0.035680901908089514, "calibration/coverage@1%": 0.035680901908089514, "calibration/coverage@10%": 0.24178913328134474, "calibration/coverage@15%": 0.31315851191612476, "calibration/coverage@20%": 0.4733090167170541, "calibration/coverage@25%": 0.7230019075321643, "calibration/coverage@30%": 0.8897233502388046, "calibration/coverage@5%": 0.08753672359530008, "calibration/ece": 0.14042280691981804, "calibration/mean_confidence": 0.6066246764748546, "calibration/prompt_uniqueness": 0.8641427667407416, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016232638888888908, "completions/max_length": 3394.8, "completions/max_terminated_length": 3394.8, "completions/mean_length": 746.4253662109375, "completions/mean_terminated_length": 758.7831420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 249.6, "epoch": 0.2639967000412495, "grad_norm": 0.0003848325868602842, "learning_rate": 2.9518072289156627e-06, "loss": -0.0136, "num_tokens": 237616224.0, "reward": 0.9861885070800781, "reward_std": 0.13821190893650054, "rewards/accuracy_reward": 0.7033854126930237, "rewards/brier_reward": 0.7831946849822998, "rewards/confidence_uniqueness_reward": 0.9336396098136902, "rewards/format_reward": 0.9835069417953491, "rewards/frontier_aurc_reward": -0.0013050575507804751, "rewards/frontier_coverage_0": -0.011092400312190876, "rewards/frontier_coverage_1": -0.011092400312190876, "rewards/frontier_coverage_10": -0.011092400312190876, "rewards/frontier_coverage_15": -0.011092400312190876, "rewards/frontier_coverage_20": -0.011092400312190876, "rewards/frontier_coverage_25": -0.011092400312190876, "rewards/frontier_coverage_5": -0.011092400312190876, "rewards/frontier_ece_reward": 0.008409860450774432, "rewards/frontier_entropy_batch_reward": -0.2879524528980255, "signal/accuracy_reward/centered_abs_mean": 0.15787217915058135, "signal/accuracy_reward/group_bin_occupancy": 0.20694444444444446, "signal/accuracy_reward/group_std_mean": 0.21840295791625977, "signal/accuracy_reward/group_zero_std_frac": 0.3444444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07893608957529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07893608957529068, "signal/advantage_abs_mean": 0.09958325326442719, "signal/advantage_pre_scale_abs_mean": 0.09958325326442719, "signal/advantage_pre_scale_std": 0.16226766407489776, "signal/advantage_std": 0.16226766407489776, "signal/brier_reward/centered_abs_mean": 0.15236919820308686, "signal/brier_reward/group_bin_occupancy": 0.8368055555555556, "signal/brier_reward/group_std_mean": 0.19616940319538118, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01523692011833191, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01523692011833191, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03703809753060341, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8079861111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.06205111965537071, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037038099486380815, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037038099486380815, "signal/format_reward/centered_abs_mean": 0.02634548582136631, "signal/format_reward/group_bin_occupancy": 0.15104166666666669, "signal/format_reward/group_std_mean": 0.049515650421380994, "signal/format_reward/group_zero_std_frac": 0.7916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013172742910683155, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013172742910683155, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014365239767357707, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6795138888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0023579075932502747, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7956549891096073e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7956549891096073e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1875341057777405, "signal/frontier_coverage_0/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_0/group_std_mean": 0.24931617081165314, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_1/centered_abs_mean": 0.1875341057777405, "signal/frontier_coverage_1/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_1/group_std_mean": 0.24931617081165314, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_10/centered_abs_mean": 0.1875341057777405, "signal/frontier_coverage_10/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_10/group_std_mean": 0.24931617081165314, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_15/centered_abs_mean": 0.1875341057777405, "signal/frontier_coverage_15/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_15/group_std_mean": 0.24931617081165314, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_20/centered_abs_mean": 0.1875341057777405, "signal/frontier_coverage_20/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_20/group_std_mean": 0.24931617081165314, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_25/centered_abs_mean": 0.1875341057777405, "signal/frontier_coverage_25/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_25/group_std_mean": 0.24931617081165314, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_5/centered_abs_mean": 0.1875341057777405, "signal/frontier_coverage_5/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_5/group_std_mean": 0.24931617081165314, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002344176359474659, "signal/frontier_ece_reward/centered_abs_mean": 0.034382133185863493, "signal/frontier_ece_reward/group_bin_occupancy": 0.8138888888888889, "signal/frontier_ece_reward/group_std_mean": 0.049156392365694045, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0034382133278995754, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0034382133278995754, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33333885073661806, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7628472222222222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4037556827068329, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03333388455212116, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333388455212116, "step": 110 }, { "calibration/aurc": 0.287285473949784, "calibration/batch_distribution_entropy": 0.9682973508384333, "calibration/batch_entropy_100bins": 0.9555438616131868, "calibration/batch_entropy_10bins": 0.9682973508384333, "calibration/batch_entropy_50bins": 0.9667426257666565, "calibration/batch_uniqueness": 0.9504674113351846, "calibration/buffer_distribution_entropy": 0.9277634548376474, "calibration/buffer_entropy_100bins": 0.9200476103891649, "calibration/buffer_entropy_10bins": 0.9277634548376474, "calibration/buffer_entropy_50bins": 0.9356148829884694, "calibration/confidence_entropy": 0.5307235518352804, "calibration/coverage@0%": 0.004751471720715325, "calibration/coverage@1%": 0.004751471720715325, "calibration/coverage@10%": 0.052850640633563484, "calibration/coverage@15%": 0.2552780215859444, "calibration/coverage@20%": 0.41941420756977194, "calibration/coverage@25%": 0.5522348050540486, "calibration/coverage@30%": 0.6566724394626509, "calibration/coverage@5%": 0.004751471720715325, "calibration/ece": 0.1892431956188166, "calibration/mean_confidence": 0.5415657741341013, "calibration/prompt_uniqueness": 0.8632119764295343, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016232638888888908, "completions/max_length": 3814.8, "completions/max_terminated_length": 3814.8, "completions/mean_length": 740.032568359375, "completions/mean_terminated_length": 752.2534545898437, "completions/min_length": 0.0, "completions/min_terminated_length": 238.6, "epoch": 0.27599655004312446, "grad_norm": 0.00034388055792078376, "learning_rate": 2.8012048192771087e-06, "loss": -0.0136, "num_tokens": 249220599.0, "reward": 0.9712253212928772, "reward_std": 0.13757235705852508, "rewards/accuracy_reward": 0.6709201455116272, "rewards/brier_reward": 0.7752394318580628, "rewards/confidence_uniqueness_reward": 0.933931851387024, "rewards/format_reward": 0.98359375, "rewards/frontier_aurc_reward": -0.0014276995789259672, "rewards/frontier_coverage_0": 0.0024690252728760244, "rewards/frontier_coverage_1": 0.0024690252728760244, "rewards/frontier_coverage_10": 0.0024690252728760244, "rewards/frontier_coverage_15": 0.0024690252728760244, "rewards/frontier_coverage_20": 0.0024690252728760244, "rewards/frontier_coverage_25": 0.0024690252728760244, "rewards/frontier_coverage_5": 0.0024690252728760244, "rewards/frontier_ece_reward": 0.00639819772914052, "rewards/frontier_entropy_batch_reward": -0.2778678983449936, "signal/accuracy_reward/centered_abs_mean": 0.1559516042470932, "signal/accuracy_reward/group_bin_occupancy": 0.19861111111111113, "signal/accuracy_reward/group_std_mean": 0.20581479370594025, "signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0779758021235466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0779758021235466, "signal/advantage_abs_mean": 0.09996644854545593, "signal/advantage_pre_scale_abs_mean": 0.09996644854545593, "signal/advantage_pre_scale_std": 0.16126873791217805, "signal/advantage_std": 0.16126873791217805, "signal/brier_reward/centered_abs_mean": 0.15016718208789825, "signal/brier_reward/group_bin_occupancy": 0.8506944444444444, "signal/brier_reward/group_std_mean": 0.19337638914585115, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015016718581318856, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015016718581318856, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03684631027281284, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7972222222222223, "signal/confidence_uniqueness_reward/group_std_mean": 0.06412606909871102, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003684631362557411, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003684631362557411, "signal/format_reward/centered_abs_mean": 0.02667643241584301, "signal/format_reward/group_bin_occupancy": 0.15243055555555557, "signal/format_reward/group_std_mean": 0.05222913697361946, "signal/format_reward/group_zero_std_frac": 0.7805555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013338216207921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013338216207921505, "signal/frontier_aurc_reward/centered_abs_mean": 0.001323050889186561, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6850694444444445, "signal/frontier_aurc_reward/group_std_mean": 0.0021448110230267046, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6538135969312862e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6538135969312862e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18924466371536255, "signal/frontier_coverage_0/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_0/group_std_mean": 0.24632638990879058, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_1/centered_abs_mean": 0.18924466371536255, "signal/frontier_coverage_1/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_1/group_std_mean": 0.24632638990879058, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_10/centered_abs_mean": 0.18924466371536255, "signal/frontier_coverage_10/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_10/group_std_mean": 0.24632638990879058, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_15/centered_abs_mean": 0.18924466371536255, "signal/frontier_coverage_15/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_15/group_std_mean": 0.24632638990879058, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_20/centered_abs_mean": 0.18924466371536255, "signal/frontier_coverage_20/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_20/group_std_mean": 0.24632638990879058, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_25/centered_abs_mean": 0.18924466371536255, "signal/frontier_coverage_25/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_25/group_std_mean": 0.24632638990879058, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_5/centered_abs_mean": 0.18924466371536255, "signal/frontier_coverage_5/group_bin_occupancy": 0.8597222222222223, "signal/frontier_coverage_5/group_std_mean": 0.24632638990879058, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023655582685023547, "signal/frontier_ece_reward/centered_abs_mean": 0.03106148950755596, "signal/frontier_ece_reward/group_bin_occupancy": 0.8159722222222221, "signal/frontier_ece_reward/group_std_mean": 0.043675854057073596, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031061490997672083, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031061490997672083, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3323960185050964, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.759375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4029895007610321, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03323960341513157, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03323960341513157, "step": 115 }, { "calibration/aurc": 0.2857290173805331, "calibration/batch_distribution_entropy": 0.9630372236339821, "calibration/batch_entropy_100bins": 0.9529791109323151, "calibration/batch_entropy_10bins": 0.9630372236339821, "calibration/batch_entropy_50bins": 0.9638178813465694, "calibration/batch_uniqueness": 0.9493225460778467, "calibration/buffer_distribution_entropy": 0.932573310927301, "calibration/buffer_entropy_100bins": 0.9254646116102905, "calibration/buffer_entropy_10bins": 0.932573310927301, "calibration/buffer_entropy_50bins": 0.9401181111666196, "calibration/confidence_entropy": 0.5012522680111101, "calibration/coverage@0%": 0.010983857117787824, "calibration/coverage@1%": 0.010983857117787824, "calibration/coverage@10%": 0.16000526703945886, "calibration/coverage@15%": 0.3665599237179272, "calibration/coverage@20%": 0.5034456617136492, "calibration/coverage@25%": 0.5335280661718852, "calibration/coverage@30%": 0.5718207547816314, "calibration/coverage@5%": 0.015683596021182085, "calibration/ece": 0.19073780715158475, "calibration/mean_confidence": 0.583309509564421, "calibration/prompt_uniqueness": 0.8659640504197897, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011284722222222255, "completions/max_length": 3415.4, "completions/max_terminated_length": 3415.4, "completions/mean_length": 733.89296875, "completions/mean_terminated_length": 742.2210571289063, "completions/min_length": 0.0, "completions/min_terminated_length": 258.8, "epoch": 0.28799640004499943, "grad_norm": 0.0003453529498074204, "learning_rate": 2.6506024096385547e-06, "loss": -0.0095, "num_tokens": 260756902.0, "reward": 0.9813999176025391, "reward_std": 0.137198106944561, "rewards/accuracy_reward": 0.6784722208976746, "rewards/brier_reward": 0.7936996936798095, "rewards/confidence_uniqueness_reward": 0.938444995880127, "rewards/format_reward": 0.9886284828186035, "rewards/frontier_aurc_reward": -0.0014351831981912256, "rewards/frontier_coverage_0": 0.012847778201103211, "rewards/frontier_coverage_1": 0.012847778201103211, "rewards/frontier_coverage_10": 0.012847778201103211, "rewards/frontier_coverage_15": 0.012847778201103211, "rewards/frontier_coverage_20": 0.012847778201103211, "rewards/frontier_coverage_25": 0.012847778201103211, "rewards/frontier_coverage_5": 0.012847778201103211, "rewards/frontier_ece_reward": 0.009404824767261743, "rewards/frontier_entropy_batch_reward": -0.2741163432598114, "signal/accuracy_reward/centered_abs_mean": 0.1679144948720932, "signal/accuracy_reward/group_bin_occupancy": 0.2034722222222222, "signal/accuracy_reward/group_std_mean": 0.22080156803131104, "signal/accuracy_reward/group_zero_std_frac": 0.37222222089767454, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0839572474360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0839572474360466, "signal/advantage_abs_mean": 0.10121523141860962, "signal/advantage_pre_scale_abs_mean": 0.10121523141860962, "signal/advantage_pre_scale_std": 0.16033710837364196, "signal/advantage_std": 0.16033710837364196, "signal/brier_reward/centered_abs_mean": 0.14215776324272156, "signal/brier_reward/group_bin_occupancy": 0.8395833333333332, "signal/brier_reward/group_std_mean": 0.18405098021030425, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014215776138007641, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014215776138007641, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031229404360055925, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.820486111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.05529859885573387, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003122940473258495, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003122940473258495, "signal/format_reward/centered_abs_mean": 0.020024956576526164, "signal/format_reward/group_bin_occupancy": 0.14861111111111108, "signal/format_reward/group_std_mean": 0.04193191379308701, "signal/format_reward/group_zero_std_frac": 0.8111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010012478288263082, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010012478288263082, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015335174975916743, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6753472222222221, "signal/frontier_aurc_reward/group_std_mean": 0.0024902403354644777, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.916896871989593e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.916896871989593e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17784596085548401, "signal/frontier_coverage_0/group_bin_occupancy": 0.8430555555555556, "signal/frontier_coverage_0/group_std_mean": 0.23407686352729798, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_1/centered_abs_mean": 0.17784596085548401, "signal/frontier_coverage_1/group_bin_occupancy": 0.8430555555555556, "signal/frontier_coverage_1/group_std_mean": 0.23407686352729798, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_10/centered_abs_mean": 0.17784596085548401, "signal/frontier_coverage_10/group_bin_occupancy": 0.8430555555555556, "signal/frontier_coverage_10/group_std_mean": 0.23407686352729798, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_15/centered_abs_mean": 0.17784596085548401, "signal/frontier_coverage_15/group_bin_occupancy": 0.8430555555555556, "signal/frontier_coverage_15/group_std_mean": 0.23407686352729798, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_20/centered_abs_mean": 0.17784596085548401, "signal/frontier_coverage_20/group_bin_occupancy": 0.8430555555555556, "signal/frontier_coverage_20/group_std_mean": 0.23407686352729798, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_25/centered_abs_mean": 0.17784596085548401, "signal/frontier_coverage_25/group_bin_occupancy": 0.8430555555555556, "signal/frontier_coverage_25/group_std_mean": 0.23407686352729798, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_5/centered_abs_mean": 0.17784596085548401, "signal/frontier_coverage_5/group_bin_occupancy": 0.8430555555555556, "signal/frontier_coverage_5/group_std_mean": 0.23407686352729798, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022230746224522592, "signal/frontier_ece_reward/centered_abs_mean": 0.03231002166867256, "signal/frontier_ece_reward/group_bin_occupancy": 0.8104166666666668, "signal/frontier_ece_reward/group_std_mean": 0.04614760801196098, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032310022041201593, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032310022041201593, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3172143340110779, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38740280270576477, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03172143436968326, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03172143436968326, "step": 120 }, { "calibration/aurc": 0.1575635410107588, "calibration/batch_distribution_entropy": 0.9612510163398869, "calibration/batch_entropy_100bins": 0.9552380496503574, "calibration/batch_entropy_10bins": 0.9612510163398869, "calibration/batch_entropy_50bins": 0.9641717634207829, "calibration/batch_uniqueness": 0.949993854889337, "calibration/buffer_distribution_entropy": 0.9353462564740422, "calibration/buffer_entropy_100bins": 0.9296285255700507, "calibration/buffer_entropy_10bins": 0.9353462564740422, "calibration/buffer_entropy_50bins": 0.9432362064347739, "calibration/confidence_entropy": 0.5031603083743401, "calibration/coverage@0%": 0.13024148292317878, "calibration/coverage@1%": 0.1729498162565121, "calibration/coverage@10%": 0.3448486815291011, "calibration/coverage@15%": 0.44466883739799756, "calibration/coverage@20%": 0.6997469786467916, "calibration/coverage@25%": 0.8670727387361822, "calibration/coverage@30%": 0.9466780773070889, "calibration/coverage@5%": 0.2884318818261089, "calibration/ece": 0.17434127126435955, "calibration/mean_confidence": 0.5888708948278307, "calibration/prompt_uniqueness": 0.8624025232593183, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011024305555555558, "completions/max_length": 3571.4, "completions/max_terminated_length": 3571.4, "completions/mean_length": 738.0600708007812, "completions/mean_terminated_length": 746.339111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 242.0, "epoch": 0.2999962500468744, "grad_norm": 0.00036861843545921147, "learning_rate": 2.5e-06, "loss": -0.0104, "num_tokens": 272377018.0, "reward": 0.9875968217849731, "reward_std": 0.12884068638086318, "rewards/accuracy_reward": 0.6880208253860474, "rewards/brier_reward": 0.7970276832580566, "rewards/confidence_uniqueness_reward": 0.9396448731422424, "rewards/format_reward": 0.9886284708976746, "rewards/frontier_aurc_reward": -0.0011375241447240114, "rewards/frontier_coverage_0": 0.01211215639486909, "rewards/frontier_coverage_1": 0.01211215639486909, "rewards/frontier_coverage_10": 0.01211215639486909, "rewards/frontier_coverage_15": 0.01211215639486909, "rewards/frontier_coverage_20": 0.01211215639486909, "rewards/frontier_coverage_25": 0.01211215639486909, "rewards/frontier_coverage_5": 0.01211215639486909, "rewards/frontier_ece_reward": 0.008880946971476077, "rewards/frontier_entropy_batch_reward": -0.26328781247138977, "signal/accuracy_reward/centered_abs_mean": 0.1634006083011627, "signal/accuracy_reward/group_bin_occupancy": 0.1982638888888889, "signal/accuracy_reward/group_std_mean": 0.21180324256420135, "signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08170030415058135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08170030415058135, "signal/advantage_abs_mean": 0.09549782574176788, "signal/advantage_pre_scale_abs_mean": 0.09549782574176788, "signal/advantage_pre_scale_std": 0.15226317346096038, "signal/advantage_std": 0.15226317346096038, "signal/brier_reward/centered_abs_mean": 0.14393920302391053, "signal/brier_reward/group_bin_occupancy": 0.8489583333333334, "signal/brier_reward/group_std_mean": 0.1842961460351944, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014393920078873634, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014393920078873634, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0306204479187727, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8493055555555555, "signal/confidence_uniqueness_reward/group_std_mean": 0.05007597878575325, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030620446428656577, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030620446428656577, "signal/format_reward/centered_abs_mean": 0.01981879323720932, "signal/format_reward/group_bin_occupancy": 0.14375, "signal/format_reward/group_std_mean": 0.037004124373197556, "signal/format_reward/group_zero_std_frac": 0.8499999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00990939661860466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00990939661860466, "signal/frontier_aurc_reward/centered_abs_mean": 0.001247124606743455, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6756944444444444, "signal/frontier_aurc_reward/group_std_mean": 0.002091983216814697, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.558905732963467e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.558905732963467e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1979488104581833, "signal/frontier_coverage_0/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_0/group_std_mean": 0.25512219667434693, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_1/centered_abs_mean": 0.1979488104581833, "signal/frontier_coverage_1/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_1/group_std_mean": 0.25512219667434693, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_10/centered_abs_mean": 0.1979488104581833, "signal/frontier_coverage_10/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_10/group_std_mean": 0.25512219667434693, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_15/centered_abs_mean": 0.1979488104581833, "signal/frontier_coverage_15/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_15/group_std_mean": 0.25512219667434693, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_20/centered_abs_mean": 0.1979488104581833, "signal/frontier_coverage_20/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_20/group_std_mean": 0.25512219667434693, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_25/centered_abs_mean": 0.1979488104581833, "signal/frontier_coverage_25/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_25/group_std_mean": 0.25512219667434693, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_5/centered_abs_mean": 0.1979488104581833, "signal/frontier_coverage_5/group_bin_occupancy": 0.8444444444444444, "signal/frontier_coverage_5/group_std_mean": 0.25512219667434693, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024743602611124516, "signal/frontier_ece_reward/centered_abs_mean": 0.032735417038202284, "signal/frontier_ece_reward/group_bin_occupancy": 0.8017361111111111, "signal/frontier_ece_reward/group_std_mean": 0.0460764616727829, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032735418528318403, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032735418528318403, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31857306957244874, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7579861111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39109026789665224, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0318573072552681, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0318573072552681, "step": 125 }, { "calibration/aurc": 0.20780328574617007, "calibration/batch_distribution_entropy": 0.9582987992066305, "calibration/batch_entropy_100bins": 0.9515668397932107, "calibration/batch_entropy_10bins": 0.9582987992066305, "calibration/batch_entropy_50bins": 0.9617035518413081, "calibration/batch_uniqueness": 0.9484267124407983, "calibration/buffer_distribution_entropy": 0.9379300572922322, "calibration/buffer_entropy_100bins": 0.9333226128121981, "calibration/buffer_entropy_10bins": 0.9379300572922322, "calibration/buffer_entropy_50bins": 0.9460268584755, "calibration/confidence_entropy": 0.48645760072603716, "calibration/coverage@0%": 0.03253532995344596, "calibration/coverage@1%": 0.03253532995344596, "calibration/coverage@10%": 0.24292767469988902, "calibration/coverage@15%": 0.3801627540686855, "calibration/coverage@20%": 0.5748314186914957, "calibration/coverage@25%": 0.7332900747954306, "calibration/coverage@30%": 0.8432733894832689, "calibration/coverage@5%": 0.061231472001795406, "calibration/ece": 0.11196021170386979, "calibration/mean_confidence": 0.571842256393492, "calibration/prompt_uniqueness": 0.8573626933942176, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017100694444444443, "completions/max_length": 3418.2, "completions/max_terminated_length": 3418.2, "completions/mean_length": 769.5232666015625, "completions/mean_terminated_length": 782.9178833007812, "completions/min_length": 0.0, "completions/min_terminated_length": 225.6, "epoch": 0.3119961000487494, "grad_norm": 0.00037062042974866927, "learning_rate": 2.349397590361446e-06, "loss": -0.0142, "num_tokens": 284366726.0, "reward": 0.9679849982261658, "reward_std": 0.14072301387786865, "rewards/accuracy_reward": 0.659374988079071, "rewards/brier_reward": 0.7807927131652832, "rewards/confidence_uniqueness_reward": 0.9330769419670105, "rewards/format_reward": 0.9828993082046509, "rewards/frontier_aurc_reward": -0.0013563590357080102, "rewards/frontier_coverage_0": 0.017163947224617004, "rewards/frontier_coverage_1": 0.017163947224617004, "rewards/frontier_coverage_10": 0.017163947224617004, "rewards/frontier_coverage_15": 0.017163947224617004, "rewards/frontier_coverage_20": 0.017163947224617004, "rewards/frontier_coverage_25": 0.017163947224617004, "rewards/frontier_coverage_5": 0.017163947224617004, "rewards/frontier_ece_reward": 0.008727512508630752, "rewards/frontier_entropy_batch_reward": -0.26896790862083436, "signal/accuracy_reward/centered_abs_mean": 0.1755316823720932, "signal/accuracy_reward/group_bin_occupancy": 0.20868055555555554, "signal/accuracy_reward/group_std_mean": 0.23276266753673552, "signal/accuracy_reward/group_zero_std_frac": 0.3305555611848831, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0877658411860466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0877658411860466, "signal/advantage_abs_mean": 0.10393321365118027, "signal/advantage_pre_scale_abs_mean": 0.10393321365118027, "signal/advantage_pre_scale_std": 0.16393719911575316, "signal/advantage_std": 0.16393719911575316, "signal/brier_reward/centered_abs_mean": 0.14976280629634858, "signal/brier_reward/group_bin_occupancy": 0.8305555555555555, "signal/brier_reward/group_std_mean": 0.19258086383342743, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014976280741393566, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014976280741393566, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03748470433056354, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8097222222222221, "signal/confidence_uniqueness_reward/group_std_mean": 0.059673815965652466, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00374847031198442, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00374847031198442, "signal/format_reward/centered_abs_mean": 0.02657877579331398, "signal/format_reward/group_bin_occupancy": 0.1482638888888889, "signal/format_reward/group_std_mean": 0.04673202857375145, "signal/format_reward/group_zero_std_frac": 0.8138889074325562, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01328938789665699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01328938789665699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014108764240518211, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6774305555555555, "signal/frontier_aurc_reward/group_std_mean": 0.0023058691993355753, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7635955009609462e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7635955009609462e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20245161652565002, "signal/frontier_coverage_0/group_bin_occupancy": 0.8277777777777777, "signal/frontier_coverage_0/group_std_mean": 0.2637217164039612, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_1/centered_abs_mean": 0.20245161652565002, "signal/frontier_coverage_1/group_bin_occupancy": 0.8277777777777777, "signal/frontier_coverage_1/group_std_mean": 0.2637217164039612, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_10/centered_abs_mean": 0.20245161652565002, "signal/frontier_coverage_10/group_bin_occupancy": 0.8277777777777777, "signal/frontier_coverage_10/group_std_mean": 0.2637217164039612, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_15/centered_abs_mean": 0.20245161652565002, "signal/frontier_coverage_15/group_bin_occupancy": 0.8277777777777777, "signal/frontier_coverage_15/group_std_mean": 0.2637217164039612, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_20/centered_abs_mean": 0.20245161652565002, "signal/frontier_coverage_20/group_bin_occupancy": 0.8277777777777777, "signal/frontier_coverage_20/group_std_mean": 0.2637217164039612, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_25/centered_abs_mean": 0.20245161652565002, "signal/frontier_coverage_25/group_bin_occupancy": 0.8277777777777777, "signal/frontier_coverage_25/group_std_mean": 0.2637217164039612, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_5/centered_abs_mean": 0.20245161652565002, "signal/frontier_coverage_5/group_bin_occupancy": 0.8277777777777777, "signal/frontier_coverage_5/group_std_mean": 0.2637217164039612, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025306452065706254, "signal/frontier_ece_reward/centered_abs_mean": 0.03250915594398975, "signal/frontier_ece_reward/group_bin_occupancy": 0.8149305555555555, "signal/frontier_ece_reward/group_std_mean": 0.0450954794883728, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032509156968444585, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032509156968444585, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3180006206035614, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7440972222222222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3872752785682678, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03180006295442581, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03180006295442581, "step": 130 }, { "calibration/aurc": 0.21932232197247176, "calibration/batch_distribution_entropy": 0.9258906597036723, "calibration/batch_entropy_100bins": 0.9298288127857933, "calibration/batch_entropy_10bins": 0.9258906597036723, "calibration/batch_entropy_50bins": 0.9390660590681558, "calibration/batch_uniqueness": 0.9410424613002439, "calibration/buffer_distribution_entropy": 0.9393241003274235, "calibration/buffer_entropy_100bins": 0.9364226957038404, "calibration/buffer_entropy_10bins": 0.9393241003274235, "calibration/buffer_entropy_50bins": 0.9481017899292319, "calibration/confidence_entropy": 0.4694250466975819, "calibration/coverage@0%": 0.02605259564249495, "calibration/coverage@1%": 0.06719842897582828, "calibration/coverage@10%": 0.22309573868711435, "calibration/coverage@15%": 0.28712263899767765, "calibration/coverage@20%": 0.4689698033900867, "calibration/coverage@25%": 0.6534364527418945, "calibration/coverage@30%": 0.7781708725802566, "calibration/coverage@5%": 0.1817817623091616, "calibration/ece": 0.13053299917973787, "calibration/mean_confidence": 0.6416413739982313, "calibration/prompt_uniqueness": 0.8535478696842471, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 3378.2, "completions/max_terminated_length": 3378.2, "completions/mean_length": 756.0257080078125, "completions/mean_terminated_length": 763.93759765625, "completions/min_length": 0.0, "completions/min_terminated_length": 229.6, "epoch": 0.32399595005062437, "grad_norm": 0.0004128075379412621, "learning_rate": 2.1987951807228917e-06, "loss": -0.0091, "num_tokens": 296169166.0, "reward": 0.9857806205749512, "reward_std": 0.12979988008737564, "rewards/accuracy_reward": 0.6876736164093018, "rewards/brier_reward": 0.8059556722640991, "rewards/confidence_uniqueness_reward": 0.9373222827911377, "rewards/format_reward": 0.9894965291023254, "rewards/frontier_aurc_reward": -0.0013067800784483552, "rewards/frontier_coverage_0": 0.017236491234507413, "rewards/frontier_coverage_1": 0.017236491234507413, "rewards/frontier_coverage_10": 0.017236491234507413, "rewards/frontier_coverage_15": 0.017236491234507413, "rewards/frontier_coverage_20": 0.017236491234507413, "rewards/frontier_coverage_25": 0.017236491234507413, "rewards/frontier_coverage_5": 0.017236491234507413, "rewards/frontier_ece_reward": 0.009917940944433212, "rewards/frontier_entropy_batch_reward": -0.2961589753627777, "signal/accuracy_reward/centered_abs_mean": 0.16046006828546525, "signal/accuracy_reward/group_bin_occupancy": 0.2020833333333333, "signal/accuracy_reward/group_std_mean": 0.2138714611530304, "signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08023003414273262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08023003414273262, "signal/advantage_abs_mean": 0.09574576169252395, "signal/advantage_pre_scale_abs_mean": 0.09574576169252395, "signal/advantage_pre_scale_std": 0.15280175805091858, "signal/advantage_std": 0.15280175805091858, "signal/brier_reward/centered_abs_mean": 0.13488745987415313, "signal/brier_reward/group_bin_occupancy": 0.8333333333333334, "signal/brier_reward/group_std_mean": 0.17542927265167235, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013488745875656604, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013488745875656604, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03078622967004776, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8409722222222223, "signal/confidence_uniqueness_reward/group_std_mean": 0.05095992609858513, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003078623116016388, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003078623116016388, "signal/format_reward/centered_abs_mean": 0.01808268241584301, "signal/format_reward/group_bin_occupancy": 0.14409722222222224, "signal/format_reward/group_std_mean": 0.03564814068377018, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009041341207921504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009041341207921504, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015255101257935165, "signal/frontier_aurc_reward/group_bin_occupancy": 0.673611111111111, "signal/frontier_aurc_reward/group_std_mean": 0.002493828348815441, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9068877008976415e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9068877008976415e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1737061321735382, "signal/frontier_coverage_0/group_bin_occupancy": 0.8340277777777777, "signal/frontier_coverage_0/group_std_mean": 0.23108671605587006, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_1/centered_abs_mean": 0.1737061321735382, "signal/frontier_coverage_1/group_bin_occupancy": 0.8340277777777777, "signal/frontier_coverage_1/group_std_mean": 0.23108671605587006, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_10/centered_abs_mean": 0.1737061321735382, "signal/frontier_coverage_10/group_bin_occupancy": 0.8340277777777777, "signal/frontier_coverage_10/group_std_mean": 0.23108671605587006, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_15/centered_abs_mean": 0.1737061321735382, "signal/frontier_coverage_15/group_bin_occupancy": 0.8340277777777777, "signal/frontier_coverage_15/group_std_mean": 0.23108671605587006, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_20/centered_abs_mean": 0.1737061321735382, "signal/frontier_coverage_20/group_bin_occupancy": 0.8340277777777777, "signal/frontier_coverage_20/group_std_mean": 0.23108671605587006, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_25/centered_abs_mean": 0.1737061321735382, "signal/frontier_coverage_25/group_bin_occupancy": 0.8340277777777777, "signal/frontier_coverage_25/group_std_mean": 0.23108671605587006, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_5/centered_abs_mean": 0.1737061321735382, "signal/frontier_coverage_5/group_bin_occupancy": 0.8340277777777777, "signal/frontier_coverage_5/group_std_mean": 0.23108671605587006, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002171326708048582, "signal/frontier_ece_reward/centered_abs_mean": 0.029922238364815713, "signal/frontier_ece_reward/group_bin_occupancy": 0.7881944444444444, "signal/frontier_ece_reward/group_std_mean": 0.042552655935287474, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0029922238551080226, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0029922238551080226, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3231747329235077, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7611111111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3919511318206787, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03231747336685657, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03231747336685657, "step": 135 }, { "calibration/aurc": 0.13275932023850517, "calibration/batch_distribution_entropy": 0.9531096852111075, "calibration/batch_entropy_100bins": 0.9454761453048437, "calibration/batch_entropy_10bins": 0.9531096852111075, "calibration/batch_entropy_50bins": 0.9589858393962905, "calibration/batch_uniqueness": 0.9471837042566907, "calibration/buffer_distribution_entropy": 0.9434576189595149, "calibration/buffer_entropy_100bins": 0.9426846675065835, "calibration/buffer_entropy_10bins": 0.9434576189595149, "calibration/buffer_entropy_50bins": 0.9526372281432562, "calibration/confidence_entropy": 0.48488417640325493, "calibration/coverage@0%": 0.09119434414782376, "calibration/coverage@1%": 0.09119434414782376, "calibration/coverage@10%": 0.44083397809732333, "calibration/coverage@15%": 0.6239463372141985, "calibration/coverage@20%": 0.7766443894128876, "calibration/coverage@25%": 0.8689414115682064, "calibration/coverage@30%": 0.9499119261161872, "calibration/coverage@5%": 0.2181709186000814, "calibration/ece": 0.1283245177662963, "calibration/mean_confidence": 0.592435027573537, "calibration/prompt_uniqueness": 0.8549437105896963, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011284722222222232, "completions/max_length": 3542.4, "completions/max_terminated_length": 3542.4, "completions/mean_length": 756.4568603515625, "completions/mean_terminated_length": 765.0693115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.33599580005249935, "grad_norm": 0.0003396812826395035, "learning_rate": 2.0481927710843377e-06, "loss": -0.0098, "num_tokens": 307987773.0, "reward": 0.9830217599868775, "reward_std": 0.12660375535488128, "rewards/accuracy_reward": 0.682812488079071, "rewards/brier_reward": 0.7918254256248474, "rewards/confidence_uniqueness_reward": 0.9390221238136292, "rewards/format_reward": 0.9885416746139526, "rewards/frontier_aurc_reward": -0.0011309069814160466, "rewards/frontier_coverage_0": 0.0069188129156827925, "rewards/frontier_coverage_1": 0.0069188129156827925, "rewards/frontier_coverage_10": 0.0069188129156827925, "rewards/frontier_coverage_15": 0.0069188129156827925, "rewards/frontier_coverage_20": 0.0069188129156827925, "rewards/frontier_coverage_25": 0.0069188129156827925, "rewards/frontier_coverage_5": 0.0069188129156827925, "rewards/frontier_ece_reward": 0.006748666008934379, "rewards/frontier_entropy_batch_reward": -0.27006215155124663, "signal/accuracy_reward/centered_abs_mean": 0.14768880009651184, "signal/accuracy_reward/group_bin_occupancy": 0.1986111111111111, "signal/accuracy_reward/group_std_mean": 0.20093624889850617, "signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07384440004825592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07384440004825592, "signal/advantage_abs_mean": 0.09231802374124527, "signal/advantage_pre_scale_abs_mean": 0.09231802374124527, "signal/advantage_pre_scale_std": 0.14954468309879304, "signal/advantage_std": 0.14954468309879304, "signal/brier_reward/centered_abs_mean": 0.13755186796188354, "signal/brier_reward/group_bin_occupancy": 0.8548611111111111, "signal/brier_reward/group_std_mean": 0.17639783918857574, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013755187578499316, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013755187578499316, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03110237456858158, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8395833333333333, "signal/confidence_uniqueness_reward/group_std_mean": 0.053076548129320146, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031102376524358988, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031102376524358988, "signal/format_reward/centered_abs_mean": 0.02012803815305233, "signal/format_reward/group_bin_occupancy": 0.14652777777777776, "signal/format_reward/group_std_mean": 0.04009459167718887, "signal/format_reward/group_zero_std_frac": 0.8277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010064019076526164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010064019076526164, "signal/frontier_aurc_reward/centered_abs_mean": 0.001249980158172548, "signal/frontier_aurc_reward/group_bin_occupancy": 0.69375, "signal/frontier_aurc_reward/group_std_mean": 0.0020357307279482485, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5624752268195153e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5624752268195153e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17975709438323975, "signal/frontier_coverage_0/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_0/group_std_mean": 0.23831536173820494, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_1/centered_abs_mean": 0.17975709438323975, "signal/frontier_coverage_1/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_1/group_std_mean": 0.23831536173820494, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_10/centered_abs_mean": 0.17975709438323975, "signal/frontier_coverage_10/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_10/group_std_mean": 0.23831536173820494, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_15/centered_abs_mean": 0.17975709438323975, "signal/frontier_coverage_15/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_15/group_std_mean": 0.23831536173820494, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_20/centered_abs_mean": 0.17975709438323975, "signal/frontier_coverage_20/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_20/group_std_mean": 0.23831536173820494, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_25/centered_abs_mean": 0.17975709438323975, "signal/frontier_coverage_25/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_25/group_std_mean": 0.23831536173820494, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_5/centered_abs_mean": 0.17975709438323975, "signal/frontier_coverage_5/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_5/group_std_mean": 0.23831536173820494, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022469636984169482, "signal/frontier_ece_reward/centered_abs_mean": 0.02870168685913086, "signal/frontier_ece_reward/group_bin_occupancy": 0.8017361111111111, "signal/frontier_ece_reward/group_std_mean": 0.04026328325271607, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028701687697321177, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028701687697321177, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32329471707344054, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7614583333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3934563398361206, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03232947215437889, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03232947215437889, "step": 140 }, { "calibration/aurc": 0.17528265336611334, "calibration/batch_distribution_entropy": 0.9844332458404799, "calibration/batch_entropy_100bins": 0.9646424669896382, "calibration/batch_entropy_10bins": 0.9844332458404799, "calibration/batch_entropy_50bins": 0.9767757755701026, "calibration/batch_uniqueness": 0.9529407609544613, "calibration/buffer_distribution_entropy": 0.9548874553492734, "calibration/buffer_entropy_100bins": 0.9550800614273213, "calibration/buffer_entropy_10bins": 0.9548874553492734, "calibration/buffer_entropy_50bins": 0.9626071569576704, "calibration/confidence_entropy": 0.5026660064805346, "calibration/coverage@0%": 0.038237593621836374, "calibration/coverage@1%": 0.038237593621836374, "calibration/coverage@10%": 0.33759787892501547, "calibration/coverage@15%": 0.49998634037265716, "calibration/coverage@20%": 0.6393046017984111, "calibration/coverage@25%": 0.7433060670275571, "calibration/coverage@30%": 0.8282726914970058, "calibration/coverage@5%": 0.17173594361032143, "calibration/ece": 0.15985296898612406, "calibration/mean_confidence": 0.5081752724560155, "calibration/prompt_uniqueness": 0.8541487763434162, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888907, "completions/max_length": 3174.6, "completions/max_terminated_length": 3174.6, "completions/mean_length": 742.3033081054688, "completions/mean_terminated_length": 749.2716918945313, "completions/min_length": 0.0, "completions/min_terminated_length": 260.2, "epoch": 0.34799565005437433, "grad_norm": 0.0003378766414243728, "learning_rate": 1.8975903614457832e-06, "loss": -0.0075, "num_tokens": 319603715.0, "reward": 1.0006329894065857, "reward_std": 0.11812710911035537, "rewards/accuracy_reward": 0.710069453716278, "rewards/brier_reward": 0.7972426533699035, "rewards/confidence_uniqueness_reward": 0.9429156422615051, "rewards/format_reward": 0.9907118082046509, "rewards/frontier_aurc_reward": -0.001022504735738039, "rewards/frontier_coverage_0": -0.00022672154009342193, "rewards/frontier_coverage_1": -0.00022672154009342193, "rewards/frontier_coverage_10": -0.00022672154009342193, "rewards/frontier_coverage_15": -0.00022672154009342193, "rewards/frontier_coverage_20": -0.00022672154009342193, "rewards/frontier_coverage_25": -0.0004101816564798355, "rewards/frontier_coverage_5": -0.00022672154009342193, "rewards/frontier_ece_reward": 0.004574788874015212, "rewards/frontier_entropy_batch_reward": -0.24196033775806428, "signal/accuracy_reward/centered_abs_mean": 0.14015841782093047, "signal/accuracy_reward/group_bin_occupancy": 0.19618055555555558, "signal/accuracy_reward/group_std_mean": 0.19147344529628754, "signal/accuracy_reward/group_zero_std_frac": 0.4305555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07007920891046523, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07007920891046523, "signal/advantage_abs_mean": 0.0849688932299614, "signal/advantage_pre_scale_abs_mean": 0.0849688932299614, "signal/advantage_pre_scale_std": 0.1410199522972107, "signal/advantage_std": 0.1410199522972107, "signal/brier_reward/centered_abs_mean": 0.1360908180475235, "signal/brier_reward/group_bin_occupancy": 0.8277777777777777, "signal/brier_reward/group_std_mean": 0.1774505376815796, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013609081320464612, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013609081320464612, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027258848026394843, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8402777777777779, "signal/confidence_uniqueness_reward/group_std_mean": 0.0479625403881073, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027258848771452905, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027258848771452905, "signal/format_reward/centered_abs_mean": 0.01674804724752903, "signal/format_reward/group_bin_occupancy": 0.1451388888888889, "signal/format_reward/group_std_mean": 0.03558648675680161, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008374023623764515, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008374023623764515, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012428847374394536, "signal/frontier_aurc_reward/group_bin_occupancy": 0.684375, "signal/frontier_aurc_reward/group_std_mean": 0.002172482665628195, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5536059800069778e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5536059800069778e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18805197179317473, "signal/frontier_coverage_0/group_bin_occupancy": 0.8288194444444444, "signal/frontier_coverage_0/group_std_mean": 0.2451252043247223, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_1/centered_abs_mean": 0.18805197179317473, "signal/frontier_coverage_1/group_bin_occupancy": 0.8288194444444444, "signal/frontier_coverage_1/group_std_mean": 0.2451252043247223, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_10/centered_abs_mean": 0.18805197179317473, "signal/frontier_coverage_10/group_bin_occupancy": 0.8288194444444444, "signal/frontier_coverage_10/group_std_mean": 0.2451252043247223, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_15/centered_abs_mean": 0.18805197179317473, "signal/frontier_coverage_15/group_bin_occupancy": 0.8288194444444444, "signal/frontier_coverage_15/group_std_mean": 0.2451252043247223, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_20/centered_abs_mean": 0.18805197179317473, "signal/frontier_coverage_20/group_bin_occupancy": 0.8288194444444444, "signal/frontier_coverage_20/group_std_mean": 0.2451252043247223, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_25/centered_abs_mean": 0.17898198664188386, "signal/frontier_coverage_25/group_bin_occupancy": 0.8277777777777778, "signal/frontier_coverage_25/group_std_mean": 0.23354826867580414, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002237274823710322, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002237274823710322, "signal/frontier_coverage_5/centered_abs_mean": 0.18805197179317473, "signal/frontier_coverage_5/group_bin_occupancy": 0.8288194444444444, "signal/frontier_coverage_5/group_std_mean": 0.2451252043247223, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023506497032940387, "signal/frontier_ece_reward/centered_abs_mean": 0.026790910586714744, "signal/frontier_ece_reward/group_bin_occupancy": 0.8034722222222224, "signal/frontier_ece_reward/group_std_mean": 0.03704798519611359, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026790911331772806, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026790911331772806, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3096132218837738, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7600694444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3817123532295227, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03096132315695286, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03096132315695286, "step": 145 }, { "calibration/aurc": 0.17615962878323987, "calibration/batch_distribution_entropy": 0.9711179605405078, "calibration/batch_entropy_100bins": 0.9558541064018309, "calibration/batch_entropy_10bins": 0.9711179605405078, "calibration/batch_entropy_50bins": 0.9667547349742606, "calibration/batch_uniqueness": 0.9504408275880152, "calibration/buffer_distribution_entropy": 0.9648408747010532, "calibration/buffer_entropy_100bins": 0.9658663277206893, "calibration/buffer_entropy_10bins": 0.9648408747010532, "calibration/buffer_entropy_50bins": 0.9711379564362602, "calibration/confidence_entropy": 0.47955049166475805, "calibration/coverage@0%": 0.04777195843262925, "calibration/coverage@1%": 0.04777195843262925, "calibration/coverage@10%": 0.4098653974296026, "calibration/coverage@15%": 0.5542765430095182, "calibration/coverage@20%": 0.6567337578913923, "calibration/coverage@25%": 0.7034798104684413, "calibration/coverage@30%": 0.755544350661012, "calibration/coverage@5%": 0.2888922753120794, "calibration/ece": 0.18957569917445502, "calibration/mean_confidence": 0.5315577019563095, "calibration/prompt_uniqueness": 0.8578190975835357, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006076388888888884, "completions/max_length": 3641.4, "completions/max_terminated_length": 3641.4, "completions/mean_length": 812.189501953125, "completions/mean_terminated_length": 817.146875, "completions/min_length": 0.0, "completions/min_terminated_length": 246.8, "epoch": 0.3599955000562493, "grad_norm": 0.0004547924909275025, "learning_rate": 1.7469879518072292e-06, "loss": -0.004, "num_tokens": 332070474.0, "reward": 0.991722309589386, "reward_std": 0.12271568328142166, "rewards/accuracy_reward": 0.6927951455116272, "rewards/brier_reward": 0.8031431198120117, "rewards/confidence_uniqueness_reward": 0.9440382122993469, "rewards/format_reward": 0.9938367962837219, "rewards/frontier_aurc_reward": -0.0011841925443150103, "rewards/frontier_coverage_0": 0.011294396594166756, "rewards/frontier_coverage_1": 0.011294396594166756, "rewards/frontier_coverage_10": 0.011294396594166756, "rewards/frontier_coverage_15": 0.011294396594166756, "rewards/frontier_coverage_20": 0.012861622869968415, "rewards/frontier_coverage_25": 0.03274488709867, "rewards/frontier_coverage_5": 0.011294396594166756, "rewards/frontier_ece_reward": 0.004732385440729558, "rewards/frontier_entropy_batch_reward": -0.2804622292518616, "signal/accuracy_reward/centered_abs_mean": 0.16096462458372116, "signal/accuracy_reward/group_bin_occupancy": 0.20069444444444445, "signal/accuracy_reward/group_std_mean": 0.212801730632782, "signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08048231229186058, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08048231229186058, "signal/advantage_abs_mean": 0.09234711825847626, "signal/advantage_pre_scale_abs_mean": 0.09234711825847626, "signal/advantage_pre_scale_std": 0.1431581974029541, "signal/advantage_std": 0.1431581974029541, "signal/brier_reward/centered_abs_mean": 0.137125688791275, "signal/brier_reward/group_bin_occupancy": 0.8302083333333332, "signal/brier_reward/group_std_mean": 0.17737070620059966, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013712569139897823, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013712569139897823, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023196187615394593, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8798611111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.03824953958392143, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002319618733599782, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002319618733599782, "signal/format_reward/centered_abs_mean": 0.01061740443110466, "signal/format_reward/group_bin_occupancy": 0.13854166666666667, "signal/format_reward/group_std_mean": 0.023116332292556763, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00530870221555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00530870221555233, "signal/frontier_aurc_reward/centered_abs_mean": 0.001641688891686499, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6972222222222222, "signal/frontier_aurc_reward/group_std_mean": 0.002830854058265686, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0521112674032338e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0521112674032338e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19024072587490082, "signal/frontier_coverage_0/group_bin_occupancy": 0.8246527777777779, "signal/frontier_coverage_0/group_std_mean": 0.2502481758594513, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_1/centered_abs_mean": 0.19024072587490082, "signal/frontier_coverage_1/group_bin_occupancy": 0.8246527777777779, "signal/frontier_coverage_1/group_std_mean": 0.2502481758594513, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_10/centered_abs_mean": 0.19024072587490082, "signal/frontier_coverage_10/group_bin_occupancy": 0.8246527777777779, "signal/frontier_coverage_10/group_std_mean": 0.2502481758594513, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_15/centered_abs_mean": 0.19024072587490082, "signal/frontier_coverage_15/group_bin_occupancy": 0.8246527777777779, "signal/frontier_coverage_15/group_std_mean": 0.2502481758594513, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_20/centered_abs_mean": 0.17767036259174346, "signal/frontier_coverage_20/group_bin_occupancy": 0.8194444444444444, "signal/frontier_coverage_20/group_std_mean": 0.23440926969051362, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022208794951438906, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022208794951438906, "signal/frontier_coverage_25/centered_abs_mean": 0.08696433901786804, "signal/frontier_coverage_25/group_bin_occupancy": 0.8604166666666668, "signal/frontier_coverage_25/group_std_mean": 0.11614209264516831, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010870542610064149, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010870542610064149, "signal/frontier_coverage_5/centered_abs_mean": 0.19024072587490082, "signal/frontier_coverage_5/group_bin_occupancy": 0.8246527777777779, "signal/frontier_coverage_5/group_std_mean": 0.2502481758594513, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023780090268701315, "signal/frontier_ece_reward/centered_abs_mean": 0.025189005583524705, "signal/frontier_ece_reward/group_bin_occupancy": 0.8534722222222223, "signal/frontier_ece_reward/group_std_mean": 0.033189672976732254, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002518900623545051, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002518900623545051, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33967989683151245, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7621527777777779, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4091792941093445, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033967990428209305, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033967990428209305, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.12707819189206102, "eval_calibration/batch_distribution_entropy": 0.9112518272923418, "eval_calibration/batch_entropy_100bins": 0.7056907814916465, "eval_calibration/batch_entropy_10bins": 0.9112518272923418, "eval_calibration/batch_entropy_50bins": 0.7793820453071528, "eval_calibration/batch_uniqueness": 0.8955338195407947, "eval_calibration/buffer_distribution_entropy": 0.9699095211172789, "eval_calibration/buffer_entropy_100bins": 0.971712014931799, "eval_calibration/buffer_entropy_10bins": 0.9699095211172789, "eval_calibration/buffer_entropy_50bins": 0.9756457318826595, "eval_calibration/confidence_entropy": 0.4789857168138119, "eval_calibration/coverage@0%": 0.27655689964157704, "eval_calibration/coverage@1%": 0.27655689964157704, "eval_calibration/coverage@10%": 0.5968413978494623, "eval_calibration/coverage@15%": 0.6931339605734766, "eval_calibration/coverage@20%": 0.9000336021505376, "eval_calibration/coverage@25%": 0.9475806451612904, "eval_calibration/coverage@30%": 0.9791666666666666, "eval_calibration/coverage@5%": 0.31440412186379924, "eval_calibration/ece": 0.2527974328369596, "eval_calibration/mean_confidence": 0.5693722667953784, "eval_calibration/prompt_uniqueness": 0.8955338195407947, "eval_completions/clipped_ratio": 0.008680555555555544, "eval_completions/max_length": 2432.1666666666665, "eval_completions/max_terminated_length": 2432.1666666666665, "eval_completions/mean_length": 768.3798828125, "eval_completions/mean_terminated_length": 775.1339111328125, "eval_completions/min_length": 102.83333333333333, "eval_completions/min_terminated_length": 302.0, "eval_loss": 0.0, "eval_num_tokens": 332070474.0, "eval_reward": 0.9103851417700449, "eval_reward_std": 0.22593281418085098, "eval_rewards/accuracy_reward": 0.6857638955116272, "eval_rewards/brier_reward": 0.8014054795106252, "eval_rewards/confidence_uniqueness_reward": 0.886686364809672, "eval_rewards/format_reward": 0.9895833233992258, "eval_rewards/frontier_aurc_reward": -0.001305475743720308, "eval_rewards/frontier_coverage_0": 0.023123869051535923, "eval_rewards/frontier_coverage_1": 0.023123869051535923, "eval_rewards/frontier_coverage_10": 0.023123869051535923, "eval_rewards/frontier_coverage_15": 0.023123869051535923, "eval_rewards/frontier_coverage_20": 0.027419194191073377, "eval_rewards/frontier_coverage_25": 0.0541337039321661, "eval_rewards/frontier_coverage_5": 0.023123869051535923, "eval_rewards/frontier_ece_reward": 0.004123160368180834, "eval_rewards/frontier_entropy_batch_reward": -0.9895833233992258, "eval_runtime": 196.7962, "eval_samples_per_second": 5.081, "eval_signal/accuracy_reward/centered_abs_mean": 0.4184027810891469, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.46373791495958966, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20920139054457346, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20920139054457346, "eval_signal/advantage_abs_mean": 0.19404976814985275, "eval_signal/advantage_pre_scale_abs_mean": 0.19404976814985275, "eval_signal/advantage_pre_scale_std": 0.22494453191757202, "eval_signal/advantage_std": 0.22494453191757202, "eval_signal/brier_reward/centered_abs_mean": 0.18808596084515253, "eval_signal/brier_reward/group_bin_occupancy": 0.8472222222222223, "eval_signal/brier_reward/group_std_mean": 0.2454528883099556, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018808596457044285, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018808596457044285, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.052401296173532806, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40972222222222227, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08490791233877341, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005240129694963495, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005240129694963495, "eval_signal/format_reward/centered_abs_mean": 0.019965277674297493, "eval_signal/format_reward/group_bin_occupancy": 0.1597222222222222, "eval_signal/format_reward/group_std_mean": 0.05294674697021643, "eval_signal/format_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009982638837148746, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.009982638837148746, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0023627388873137534, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6041666666666666, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005246327879528205, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.953423669775172e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.953423669775172e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.29363420108954114, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9166666666666666, "eval_signal/frontier_coverage_0/group_std_mean": 0.40715718269348145, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.29363420108954114, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9166666666666666, "eval_signal/frontier_coverage_1/group_std_mean": 0.40715718269348145, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.29363420108954114, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9166666666666666, "eval_signal/frontier_coverage_10/group_std_mean": 0.40715718269348145, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.29363420108954114, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9166666666666666, "eval_signal/frontier_coverage_15/group_std_mean": 0.40715718269348145, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.21985628455877304, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9097222222222223, "eval_signal/frontier_coverage_20/group_std_mean": 0.3140091150999069, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027482035026575127, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027482035026575127, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.08635564024249713, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.920138888888889, "eval_signal/frontier_coverage_25/group_std_mean": 0.11180556441346805, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010794455301947892, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010794455301947892, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.29363420108954114, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9166666666666666, "eval_signal/frontier_coverage_5/group_std_mean": 0.40715718269348145, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036704275213802853, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.03141581453382969, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9791666666666666, "eval_signal/frontier_ece_reward/group_std_mean": 0.04069533385336399, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031415815077101192, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031415815077101192, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.019965277674297493, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.1597222222222222, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.05294674697021643, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0019965278139958778, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0019965278139958778, "eval_steps_per_second": 0.03, "step": 150 }, { "calibration/aurc": 0.14328247622139104, "calibration/batch_distribution_entropy": 0.968343329600269, "calibration/batch_entropy_100bins": 0.9604368265719134, "calibration/batch_entropy_10bins": 0.968343329600269, "calibration/batch_entropy_50bins": 0.9692432146982082, "calibration/batch_uniqueness": 0.9501986612953859, "calibration/buffer_distribution_entropy": 0.9723362683790409, "calibration/buffer_entropy_100bins": 0.9750311013963866, "calibration/buffer_entropy_10bins": 0.9723362683790409, "calibration/buffer_entropy_50bins": 0.9780727171466873, "calibration/confidence_entropy": 0.4911174549743745, "calibration/coverage@0%": 0.02847163633341644, "calibration/coverage@1%": 0.02847163633341644, "calibration/coverage@10%": 0.4854469320479792, "calibration/coverage@15%": 0.6681463087620155, "calibration/coverage@20%": 0.7713066843956897, "calibration/coverage@25%": 0.8516330092246323, "calibration/coverage@30%": 0.9198731266794095, "calibration/coverage@5%": 0.17589222970165377, "calibration/ece": 0.19967351461458113, "calibration/mean_confidence": 0.5618897389159396, "calibration/prompt_uniqueness": 0.8516510929037338, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008072916666666674, "completions/max_length": 3505.6, "completions/max_terminated_length": 3505.6, "completions/mean_length": 754.0788330078125, "completions/mean_terminated_length": 760.2467651367188, "completions/min_length": 0.0, "completions/min_terminated_length": 255.2, "epoch": 0.3719953500581243, "grad_norm": 0.0003978419699706137, "learning_rate": 1.5963855421686747e-06, "loss": -0.0069, "num_tokens": 343865174.0, "reward": 1.0113879680633544, "reward_std": 0.12805038392543794, "rewards/accuracy_reward": 0.7330729126930237, "rewards/brier_reward": 0.805462658405304, "rewards/confidence_uniqueness_reward": 0.942124092578888, "rewards/format_reward": 0.9918402671813965, "rewards/frontier_aurc_reward": -0.001076408103108406, "rewards/frontier_coverage_0": -0.007807806041091681, "rewards/frontier_coverage_1": -0.007807806041091681, "rewards/frontier_coverage_10": -0.007807806041091681, "rewards/frontier_coverage_15": -0.006517884694039822, "rewards/frontier_coverage_20": 0.012862606934504583, "rewards/frontier_coverage_25": 0.07125783488154411, "rewards/frontier_coverage_5": -0.007807806041091681, "rewards/frontier_ece_reward": 0.0006221902323886753, "rewards/frontier_entropy_batch_reward": -0.26455708146095275, "signal/accuracy_reward/centered_abs_mean": 0.16763780415058135, "signal/accuracy_reward/group_bin_occupancy": 0.1986111111111111, "signal/accuracy_reward/group_std_mean": 0.2156655490398407, "signal/accuracy_reward/group_zero_std_frac": 0.4111111044883728, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08381890207529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08381890207529068, "signal/advantage_abs_mean": 0.09625413119792939, "signal/advantage_pre_scale_abs_mean": 0.09625413119792939, "signal/advantage_pre_scale_std": 0.15237030386924744, "signal/advantage_std": 0.15237030386924744, "signal/brier_reward/centered_abs_mean": 0.13339466452598572, "signal/brier_reward/group_bin_occupancy": 0.8326388888888889, "signal/brier_reward/group_std_mean": 0.17345697283744813, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013339466601610183, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013339466601610183, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026469842717051505, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8583333333333332, "signal/confidence_uniqueness_reward/group_std_mean": 0.04400735050439834, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026469843462109564, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026469843462109564, "signal/format_reward/centered_abs_mean": 0.014691840298473835, "signal/format_reward/group_bin_occupancy": 0.14097222222222222, "signal/format_reward/group_std_mean": 0.02975890673696995, "signal/format_reward/group_zero_std_frac": 0.8722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007345920149236918, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007345920149236918, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016013333341106772, "signal/frontier_aurc_reward/group_bin_occupancy": 0.701388888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0028420645277947186, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0016666530864315e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0016666530864315e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19111153185367585, "signal/frontier_coverage_0/group_bin_occupancy": 0.8305555555555555, "signal/frontier_coverage_0/group_std_mean": 0.24907057583332062, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023888942785561086, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023888942785561086, "signal/frontier_coverage_1/centered_abs_mean": 0.19111153185367585, "signal/frontier_coverage_1/group_bin_occupancy": 0.8305555555555555, "signal/frontier_coverage_1/group_std_mean": 0.24907057583332062, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023888942785561086, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023888942785561086, "signal/frontier_coverage_10/centered_abs_mean": 0.19111153185367585, "signal/frontier_coverage_10/group_bin_occupancy": 0.8305555555555555, "signal/frontier_coverage_10/group_std_mean": 0.24907057583332062, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023888942785561086, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023888942785561086, "signal/frontier_coverage_15/centered_abs_mean": 0.18777382373809814, "signal/frontier_coverage_15/group_bin_occupancy": 0.8302083333333334, "signal/frontier_coverage_15/group_std_mean": 0.2448781967163086, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023471728432923555, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023471728432923555, "signal/frontier_coverage_20/centered_abs_mean": 0.11022986769676209, "signal/frontier_coverage_20/group_bin_occupancy": 0.8333333333333334, "signal/frontier_coverage_20/group_std_mean": 0.14672330319881438, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013778733555227518, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013778733555227518, "signal/frontier_coverage_25/centered_abs_mean": 0.06623064428567886, "signal/frontier_coverage_25/group_bin_occupancy": 0.923611111111111, "signal/frontier_coverage_25/group_std_mean": 0.08523637503385544, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008278830791823566, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008278830791823566, "signal/frontier_coverage_5/centered_abs_mean": 0.19111153185367585, "signal/frontier_coverage_5/group_bin_occupancy": 0.8305555555555555, "signal/frontier_coverage_5/group_std_mean": 0.24907057583332062, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023888942785561086, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023888942785561086, "signal/frontier_ece_reward/centered_abs_mean": 0.022619354724884033, "signal/frontier_ece_reward/group_bin_occupancy": 0.892361111111111, "signal/frontier_ece_reward/group_std_mean": 0.029080601409077644, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022619356401264667, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022619356401264667, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31687353253364564, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7545138888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3865100502967834, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031687355041503905, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031687355041503905, "step": 155 }, { "calibration/aurc": 0.12690070084218358, "calibration/batch_distribution_entropy": 0.9355695973242885, "calibration/batch_entropy_100bins": 0.9378717298573725, "calibration/batch_entropy_10bins": 0.9355695973242885, "calibration/batch_entropy_50bins": 0.9463927534050771, "calibration/batch_uniqueness": 0.9454156366716366, "calibration/buffer_distribution_entropy": 0.9767544955765025, "calibration/buffer_entropy_100bins": 0.9816366892198417, "calibration/buffer_entropy_10bins": 0.9767544955765025, "calibration/buffer_entropy_50bins": 0.9828938587347263, "calibration/confidence_entropy": 0.4945956173161183, "calibration/coverage@0%": 0.08913938989672962, "calibration/coverage@1%": 0.08913938989672962, "calibration/coverage@10%": 0.6179837830648094, "calibration/coverage@15%": 0.7267282037552255, "calibration/coverage@20%": 0.7877159740059231, "calibration/coverage@25%": 0.8556935730627254, "calibration/coverage@30%": 0.881283422459893, "calibration/coverage@5%": 0.4197157681091827, "calibration/ece": 0.15444755446701258, "calibration/mean_confidence": 0.6324834605056432, "calibration/prompt_uniqueness": 0.8498323599648391, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3734.0, "completions/max_terminated_length": 3734.0, "completions/mean_length": 752.8614624023437, "completions/mean_terminated_length": 761.7844116210938, "completions/min_length": 0.0, "completions/min_terminated_length": 233.2, "epoch": 0.38399520005999926, "grad_norm": 0.0003355911758262664, "learning_rate": 1.4457831325301204e-06, "loss": -0.0085, "num_tokens": 355625434.0, "reward": 0.9778003096580505, "reward_std": 0.1277013435959816, "rewards/accuracy_reward": 0.6736111164093017, "rewards/brier_reward": 0.7985804080963135, "rewards/confidence_uniqueness_reward": 0.9363638043403626, "rewards/format_reward": 0.9882812380790711, "rewards/frontier_aurc_reward": -0.0021719550946727394, "rewards/frontier_coverage_0": 0.020820068009197713, "rewards/frontier_coverage_1": 0.020820068009197713, "rewards/frontier_coverage_10": 0.020820068009197713, "rewards/frontier_coverage_15": 0.021759903896600007, "rewards/frontier_coverage_20": 0.029553866386413573, "rewards/frontier_coverage_25": 0.08654351085424423, "rewards/frontier_coverage_5": 0.020820068009197713, "rewards/frontier_ece_reward": 0.003279139272217435, "rewards/frontier_entropy_batch_reward": -0.2970531314611435, "signal/accuracy_reward/centered_abs_mean": 0.15097656100988388, "signal/accuracy_reward/group_bin_occupancy": 0.19722222222222222, "signal/accuracy_reward/group_std_mean": 0.20050234198570252, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07548828050494194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07548828050494194, "signal/advantage_abs_mean": 0.09499836862087249, "signal/advantage_pre_scale_abs_mean": 0.09499836862087249, "signal/advantage_pre_scale_std": 0.15216540694236755, "signal/advantage_std": 0.15216540694236755, "signal/brier_reward/centered_abs_mean": 0.1346014305949211, "signal/brier_reward/group_bin_occupancy": 0.845486111111111, "signal/brier_reward/group_std_mean": 0.17376158237457276, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013460143469274044, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013460143469274044, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03248362205922604, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.840625, "signal/confidence_uniqueness_reward/group_std_mean": 0.05201292261481285, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032483623828738926, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032483623828738926, "signal/format_reward/centered_abs_mean": 0.019677734375, "signal/format_reward/group_bin_occupancy": 0.14375, "signal/format_reward/group_std_mean": 0.03653144314885139, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0098388671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0098388671875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00255175766069442, "signal/frontier_aurc_reward/group_bin_occupancy": 0.69375, "signal/frontier_aurc_reward/group_std_mean": 0.004304410610347986, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1896971267997285e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1896971267997285e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17283792197704315, "signal/frontier_coverage_0/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_0/group_std_mean": 0.22631404995918275, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021604740992188453, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021604740992188453, "signal/frontier_coverage_1/centered_abs_mean": 0.17283792197704315, "signal/frontier_coverage_1/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_1/group_std_mean": 0.22631404995918275, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021604740992188453, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021604740992188453, "signal/frontier_coverage_10/centered_abs_mean": 0.17283792197704315, "signal/frontier_coverage_10/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_10/group_std_mean": 0.22631404995918275, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021604740992188453, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021604740992188453, "signal/frontier_coverage_15/centered_abs_mean": 0.1649569660425186, "signal/frontier_coverage_15/group_bin_occupancy": 0.8302083333333334, "signal/frontier_coverage_15/group_std_mean": 0.21631556153297424, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020619621267542244, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020619621267542244, "signal/frontier_coverage_20/centered_abs_mean": 0.0718404695391655, "signal/frontier_coverage_20/group_bin_occupancy": 0.8743055555555556, "signal/frontier_coverage_20/group_std_mean": 0.09588805437088013, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008980058599263429, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008980058599263429, "signal/frontier_coverage_25/centered_abs_mean": 0.07879409492015839, "signal/frontier_coverage_25/group_bin_occupancy": 0.9197916666666666, "signal/frontier_coverage_25/group_std_mean": 0.10093283802270889, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009849262423813343, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009849262423813343, "signal/frontier_coverage_5/centered_abs_mean": 0.17283792197704315, "signal/frontier_coverage_5/group_bin_occupancy": 0.8322916666666667, "signal/frontier_coverage_5/group_std_mean": 0.22631404995918275, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021604740992188453, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021604740992188453, "signal/frontier_ece_reward/centered_abs_mean": 0.02024664729833603, "signal/frontier_ece_reward/group_bin_occupancy": 0.8885416666666666, "signal/frontier_ece_reward/group_std_mean": 0.026085399091243744, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020246647531166674, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020246647531166674, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3272906422615051, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7503472222222223, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3956748187541962, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03272906616330147, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03272906616330147, "step": 160 }, { "calibration/aurc": 0.14685283005823302, "calibration/batch_distribution_entropy": 0.9624874588175226, "calibration/batch_entropy_100bins": 0.9493697551644169, "calibration/batch_entropy_10bins": 0.9624874588175226, "calibration/batch_entropy_50bins": 0.9618907631874976, "calibration/batch_uniqueness": 0.9484851363978098, "calibration/buffer_distribution_entropy": 0.9804740832245967, "calibration/buffer_entropy_100bins": 0.9871675813981946, "calibration/buffer_entropy_10bins": 0.9804740832245967, "calibration/buffer_entropy_50bins": 0.9869692646206021, "calibration/confidence_entropy": 0.48256626117106355, "calibration/coverage@0%": 0.16632372470645526, "calibration/coverage@1%": 0.2175999027169265, "calibration/coverage@10%": 0.4748341393546909, "calibration/coverage@15%": 0.6035655088332543, "calibration/coverage@20%": 0.6774566915283347, "calibration/coverage@25%": 0.7548037091669609, "calibration/coverage@30%": 0.8582674395252046, "calibration/coverage@5%": 0.34612221071049537, "calibration/ece": 0.1872612900282566, "calibration/mean_confidence": 0.5299400808356729, "calibration/prompt_uniqueness": 0.8486473671237833, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011545138888888907, "completions/max_length": 3253.6, "completions/max_terminated_length": 3253.6, "completions/mean_length": 786.2470581054688, "completions/mean_terminated_length": 795.4893676757813, "completions/min_length": 0.0, "completions/min_terminated_length": 260.2, "epoch": 0.39599505006187424, "grad_norm": 0.00034029711969196796, "learning_rate": 1.2951807228915664e-06, "loss": -0.0074, "num_tokens": 367822072.0, "reward": 0.9808308362960816, "reward_std": 0.12498695105314254, "rewards/accuracy_reward": 0.6665798664093018, "rewards/brier_reward": 0.8007077097892761, "rewards/confidence_uniqueness_reward": 0.9404749751091004, "rewards/format_reward": 0.9883680582046509, "rewards/frontier_aurc_reward": -0.0013396847061812878, "rewards/frontier_coverage_0": 0.030891514010727407, "rewards/frontier_coverage_1": 0.030891514010727407, "rewards/frontier_coverage_10": 0.030891514010727407, "rewards/frontier_coverage_15": 0.033891326561570165, "rewards/frontier_coverage_20": 0.04144119620323181, "rewards/frontier_coverage_25": 0.0987599179148674, "rewards/frontier_coverage_5": 0.030891514010727407, "rewards/frontier_ece_reward": 0.002779871807433665, "rewards/frontier_entropy_batch_reward": -0.2474340170621872, "signal/accuracy_reward/centered_abs_mean": 0.14655490815639496, "signal/accuracy_reward/group_bin_occupancy": 0.196875, "signal/accuracy_reward/group_std_mean": 0.19666456878185273, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07327745407819748, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07327745407819748, "signal/advantage_abs_mean": 0.09191209226846694, "signal/advantage_pre_scale_abs_mean": 0.09191209226846694, "signal/advantage_pre_scale_std": 0.14856612384319307, "signal/advantage_std": 0.14856612384319307, "signal/brier_reward/centered_abs_mean": 0.13006339371204376, "signal/brier_reward/group_bin_occupancy": 0.8291666666666666, "signal/brier_reward/group_std_mean": 0.1694784790277481, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013006339780986309, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013006339780986309, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02956257574260235, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8458333333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.04808454513549805, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002956257527694106, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002956257527694106, "signal/format_reward/centered_abs_mean": 0.019140625, "signal/format_reward/group_bin_occupancy": 0.14270833333333333, "signal/format_reward/group_std_mean": 0.03554247245192528, "signal/format_reward/group_zero_std_frac": 0.8583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0095703125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0095703125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015813488746061922, "signal/frontier_aurc_reward/group_bin_occupancy": 0.709375, "signal/frontier_aurc_reward/group_std_mean": 0.00276105348020792, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9766861441894434e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9766861441894434e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18960587084293365, "signal/frontier_coverage_0/group_bin_occupancy": 0.8322916666666668, "signal/frontier_coverage_0/group_std_mean": 0.2463166147470474, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002370073506608605, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002370073506608605, "signal/frontier_coverage_1/centered_abs_mean": 0.18960587084293365, "signal/frontier_coverage_1/group_bin_occupancy": 0.8322916666666668, "signal/frontier_coverage_1/group_std_mean": 0.2463166147470474, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002370073506608605, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002370073506608605, "signal/frontier_coverage_10/centered_abs_mean": 0.18960587084293365, "signal/frontier_coverage_10/group_bin_occupancy": 0.8322916666666668, "signal/frontier_coverage_10/group_std_mean": 0.2463166147470474, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002370073506608605, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002370073506608605, "signal/frontier_coverage_15/centered_abs_mean": 0.16226165294647216, "signal/frontier_coverage_15/group_bin_occupancy": 0.828125, "signal/frontier_coverage_15/group_std_mean": 0.21179051101207733, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020282708341255785, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020282708341255785, "signal/frontier_coverage_20/centered_abs_mean": 0.06612305119633674, "signal/frontier_coverage_20/group_bin_occupancy": 0.8989583333333332, "signal/frontier_coverage_20/group_std_mean": 0.08611558228731156, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008265381446108222, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008265381446108222, "signal/frontier_coverage_25/centered_abs_mean": 0.0762871414422989, "signal/frontier_coverage_25/group_bin_occupancy": 0.9041666666666666, "signal/frontier_coverage_25/group_std_mean": 0.0986581414937973, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009535892982967198, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009535892982967198, "signal/frontier_coverage_5/centered_abs_mean": 0.18960587084293365, "signal/frontier_coverage_5/group_bin_occupancy": 0.8322916666666668, "signal/frontier_coverage_5/group_std_mean": 0.2463166147470474, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002370073506608605, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002370073506608605, "signal/frontier_ece_reward/centered_abs_mean": 0.02045116536319256, "signal/frontier_ece_reward/group_bin_occupancy": 0.8788194444444445, "signal/frontier_ece_reward/group_std_mean": 0.026219840347766876, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020451165502890943, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020451165502890943, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3110631048679352, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7541666666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38286496996879577, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031106310337781905, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031106310337781905, "step": 165 }, { "calibration/aurc": 0.11506329568369203, "calibration/batch_distribution_entropy": 0.9444845017520622, "calibration/batch_entropy_100bins": 0.9396727075432679, "calibration/batch_entropy_10bins": 0.9444845017520622, "calibration/batch_entropy_50bins": 0.9480220782226461, "calibration/batch_uniqueness": 0.9441397102617053, "calibration/buffer_distribution_entropy": 0.9836986498366203, "calibration/buffer_entropy_100bins": 0.9908642173134584, "calibration/buffer_entropy_10bins": 0.9836986498366203, "calibration/buffer_entropy_50bins": 0.9898192285924958, "calibration/confidence_entropy": 0.4891014253676884, "calibration/coverage@0%": 0.07110792272110364, "calibration/coverage@1%": 0.14246208938777033, "calibration/coverage@10%": 0.5021638829429665, "calibration/coverage@15%": 0.7248016285021122, "calibration/coverage@20%": 0.8123163068597996, "calibration/coverage@25%": 0.8979261867360814, "calibration/coverage@30%": 0.9481275715867081, "calibration/coverage@5%": 0.4241632481301785, "calibration/ece": 0.14160998775248346, "calibration/mean_confidence": 0.6190916753688434, "calibration/prompt_uniqueness": 0.8486469799052279, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0057291666666666515, "completions/max_length": 3744.8, "completions/max_terminated_length": 3744.8, "completions/mean_length": 763.334375, "completions/mean_terminated_length": 767.7540161132813, "completions/min_length": 0.0, "completions/min_terminated_length": 250.4, "epoch": 0.4079949000637492, "grad_norm": 0.00034656302887015045, "learning_rate": 1.1445783132530121e-06, "loss": -0.0039, "num_tokens": 379704868.0, "reward": 1.0055498480796814, "reward_std": 0.11865353286266327, "rewards/accuracy_reward": 0.7196180582046509, "rewards/brier_reward": 0.8130927443504333, "rewards/confidence_uniqueness_reward": 0.943022859096527, "rewards/format_reward": 0.9942708253860474, "rewards/frontier_aurc_reward": -0.001395798078738153, "rewards/frontier_coverage_0": 0.0038308378309011458, "rewards/frontier_coverage_1": 0.0038308378309011458, "rewards/frontier_coverage_10": 0.0038315469399094583, "rewards/frontier_coverage_15": 0.014347630552947521, "rewards/frontier_coverage_20": 0.04858548492193222, "rewards/frontier_coverage_25": 0.12771541029214858, "rewards/frontier_coverage_5": 0.0038308378309011458, "rewards/frontier_ece_reward": 0.00022077972535043954, "rewards/frontier_entropy_batch_reward": -0.29585447907447815, "signal/accuracy_reward/centered_abs_mean": 0.14562717080116272, "signal/accuracy_reward/group_bin_occupancy": 0.19722222222222222, "signal/accuracy_reward/group_std_mean": 0.19776785969734192, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07281358540058136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07281358540058136, "signal/advantage_abs_mean": 0.08716671168804169, "signal/advantage_pre_scale_abs_mean": 0.08716671168804169, "signal/advantage_pre_scale_std": 0.13849908411502837, "signal/advantage_std": 0.13849908411502837, "signal/brier_reward/centered_abs_mean": 0.12307202219963073, "signal/brier_reward/group_bin_occupancy": 0.8347222222222221, "signal/brier_reward/group_std_mean": 0.16019360721111298, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012307202816009522, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012307202816009522, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023052535578608514, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8871527777777777, "signal/confidence_uniqueness_reward/group_std_mean": 0.0379045195877552, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023052536882460116, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023052536882460116, "signal/format_reward/centered_abs_mean": 0.010394965391606092, "signal/format_reward/group_bin_occupancy": 0.13784722222222223, "signal/format_reward/group_std_mean": 0.022536759078502656, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005197482695803046, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005197482695803046, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017953221686184406, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7086805555555555, "signal/frontier_aurc_reward/group_std_mean": 0.0030906103551387788, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.244152765342733e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.244152765342733e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17162533700466157, "signal/frontier_coverage_0/group_bin_occupancy": 0.829861111111111, "signal/frontier_coverage_0/group_std_mean": 0.22700339257717134, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00214531677775085, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00214531677775085, "signal/frontier_coverage_1/centered_abs_mean": 0.17162533700466157, "signal/frontier_coverage_1/group_bin_occupancy": 0.829861111111111, "signal/frontier_coverage_1/group_std_mean": 0.22700339257717134, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00214531677775085, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00214531677775085, "signal/frontier_coverage_10/centered_abs_mean": 0.17162414491176606, "signal/frontier_coverage_10/group_bin_occupancy": 0.829861111111111, "signal/frontier_coverage_10/group_std_mean": 0.2270018845796585, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002145301876589656, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002145301876589656, "signal/frontier_coverage_15/centered_abs_mean": 0.13137867748737336, "signal/frontier_coverage_15/group_bin_occupancy": 0.828125, "signal/frontier_coverage_15/group_std_mean": 0.1753629505634308, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00164223350584507, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00164223350584507, "signal/frontier_coverage_20/centered_abs_mean": 0.058792735636234286, "signal/frontier_coverage_20/group_bin_occupancy": 0.9239583333333332, "signal/frontier_coverage_20/group_std_mean": 0.07586074471473694, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007349092396907508, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007349092396907508, "signal/frontier_coverage_25/centered_abs_mean": 0.08862319886684418, "signal/frontier_coverage_25/group_bin_occupancy": 0.9076388888888889, "signal/frontier_coverage_25/group_std_mean": 0.11482690125703812, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011077900417149067, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011077900417149067, "signal/frontier_coverage_5/centered_abs_mean": 0.17162533700466157, "signal/frontier_coverage_5/group_bin_occupancy": 0.829861111111111, "signal/frontier_coverage_5/group_std_mean": 0.22700339257717134, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00214531677775085, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00214531677775085, "signal/frontier_ece_reward/centered_abs_mean": 0.019284069538116455, "signal/frontier_ece_reward/group_bin_occupancy": 0.8850694444444445, "signal/frontier_ece_reward/group_std_mean": 0.024903832748532296, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019284070702269673, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019284070702269673, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3286241352558136, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7600694444444444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39900038838386537, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03286241367459297, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03286241367459297, "step": 170 }, { "calibration/aurc": 0.1167342775029246, "calibration/batch_distribution_entropy": 0.9745598037903308, "calibration/batch_entropy_100bins": 0.9560613502038893, "calibration/batch_entropy_10bins": 0.9745598037903308, "calibration/batch_entropy_50bins": 0.9679135925927282, "calibration/batch_uniqueness": 0.9500906165718821, "calibration/buffer_distribution_entropy": 0.9859050713779315, "calibration/buffer_entropy_100bins": 0.9924500141789924, "calibration/buffer_entropy_10bins": 0.9859050713779315, "calibration/buffer_entropy_50bins": 0.9912866063537695, "calibration/confidence_entropy": 0.4973909231670772, "calibration/coverage@0%": 0.0202633365766245, "calibration/coverage@1%": 0.0202633365766245, "calibration/coverage@10%": 0.5584808730819832, "calibration/coverage@15%": 0.7098551422535211, "calibration/coverage@20%": 0.8395232713826571, "calibration/coverage@25%": 0.9306284277563652, "calibration/coverage@30%": 0.9889786840178486, "calibration/coverage@5%": 0.2781329842712045, "calibration/ece": 0.19580150982554967, "calibration/mean_confidence": 0.5390505752013575, "calibration/prompt_uniqueness": 0.8520957156663304, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010677083333333347, "completions/max_length": 3945.2, "completions/max_terminated_length": 3945.2, "completions/mean_length": 797.4620727539062, "completions/mean_terminated_length": 806.0635620117188, "completions/min_length": 0.0, "completions/min_terminated_length": 246.0, "epoch": 0.4199947500656242, "grad_norm": 0.0003526929940562695, "learning_rate": 9.93975903614458e-07, "loss": -0.0088, "num_tokens": 391999599.0, "reward": 1.001812708377838, "reward_std": 0.1259681537747383, "rewards/accuracy_reward": 0.7131076335906983, "rewards/brier_reward": 0.8003608345985412, "rewards/confidence_uniqueness_reward": 0.9405298233032227, "rewards/format_reward": 0.9892361164093018, "rewards/frontier_aurc_reward": -0.0010344096925109624, "rewards/frontier_coverage_0": -0.00219659386202693, "rewards/frontier_coverage_1": -0.00219659386202693, "rewards/frontier_coverage_10": -0.0021957614459097385, "rewards/frontier_coverage_15": 0.010962388198822736, "rewards/frontier_coverage_20": 0.055274682492017745, "rewards/frontier_coverage_25": 0.13252132833004, "rewards/frontier_coverage_5": -0.00219659386202693, "rewards/frontier_ece_reward": -0.0011111346306279303, "rewards/frontier_entropy_batch_reward": -0.2569884657859802, "signal/accuracy_reward/centered_abs_mean": 0.15221896767616272, "signal/accuracy_reward/group_bin_occupancy": 0.20381944444444447, "signal/accuracy_reward/group_std_mean": 0.20974666476249695, "signal/accuracy_reward/group_zero_std_frac": 0.3694444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07610948383808136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07610948383808136, "signal/advantage_abs_mean": 0.09184942096471786, "signal/advantage_pre_scale_abs_mean": 0.09184942096471786, "signal/advantage_pre_scale_std": 0.14980422854423522, "signal/advantage_std": 0.14980422854423522, "signal/brier_reward/centered_abs_mean": 0.13161776959896088, "signal/brier_reward/group_bin_occupancy": 0.8381944444444442, "signal/brier_reward/group_std_mean": 0.17007612586021423, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013161776773631572, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013161776773631572, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02938559278845787, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8579861111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.04647618532180786, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002938559278845787, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002938559278845787, "signal/format_reward/centered_abs_mean": 0.018250868283212186, "signal/format_reward/group_bin_occupancy": 0.14131944444444447, "signal/format_reward/group_std_mean": 0.032946827635169026, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009125434141606093, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009125434141606093, "signal/frontier_aurc_reward/centered_abs_mean": 0.00137441111728549, "signal/frontier_aurc_reward/group_bin_occupancy": 0.70625, "signal/frontier_aurc_reward/group_std_mean": 0.0025132787879556416, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7180139366246294e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7180139366246294e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19130564630031585, "signal/frontier_coverage_0/group_bin_occupancy": 0.825, "signal/frontier_coverage_0/group_std_mean": 0.2510385990142822, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023913206066936256, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023913206066936256, "signal/frontier_coverage_1/centered_abs_mean": 0.19130564630031585, "signal/frontier_coverage_1/group_bin_occupancy": 0.825, "signal/frontier_coverage_1/group_std_mean": 0.2510385990142822, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023913206066936256, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023913206066936256, "signal/frontier_coverage_10/centered_abs_mean": 0.19130274057388305, "signal/frontier_coverage_10/group_bin_occupancy": 0.825, "signal/frontier_coverage_10/group_std_mean": 0.25103478133678436, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002391284331679344, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002391284331679344, "signal/frontier_coverage_15/centered_abs_mean": 0.13286823630332947, "signal/frontier_coverage_15/group_bin_occupancy": 0.8291666666666666, "signal/frontier_coverage_15/group_std_mean": 0.1757916271686554, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016608530189841987, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016608530189841987, "signal/frontier_coverage_20/centered_abs_mean": 0.06132784262299538, "signal/frontier_coverage_20/group_bin_occupancy": 0.91875, "signal/frontier_coverage_20/group_std_mean": 0.07887878715991974, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007665980607271195, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007665980607271195, "signal/frontier_coverage_25/centered_abs_mean": 0.08898466527462005, "signal/frontier_coverage_25/group_bin_occupancy": 0.9013888888888889, "signal/frontier_coverage_25/group_std_mean": 0.11519535034894943, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011123083299025894, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011123083299025894, "signal/frontier_coverage_5/centered_abs_mean": 0.19130564630031585, "signal/frontier_coverage_5/group_bin_occupancy": 0.825, "signal/frontier_coverage_5/group_std_mean": 0.2510385990142822, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023913206066936256, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023913206066936256, "signal/frontier_ece_reward/centered_abs_mean": 0.020530903711915016, "signal/frontier_ece_reward/group_bin_occupancy": 0.8666666666666668, "signal/frontier_ece_reward/group_std_mean": 0.026369891688227655, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002053090324625373, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002053090324625373, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3158855140209198, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7579861111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3868151426315308, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03158855028450489, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03158855028450489, "step": 175 }, { "calibration/aurc": 0.0817330623111906, "calibration/batch_distribution_entropy": 0.9711504748208586, "calibration/batch_entropy_100bins": 0.9562899168107373, "calibration/batch_entropy_10bins": 0.9711504748208586, "calibration/batch_entropy_50bins": 0.9662422602252176, "calibration/batch_uniqueness": 0.9499827534399664, "calibration/buffer_distribution_entropy": 0.9860851734186549, "calibration/buffer_entropy_100bins": 0.9926045511496687, "calibration/buffer_entropy_10bins": 0.9860851734186549, "calibration/buffer_entropy_50bins": 0.9914161779811446, "calibration/confidence_entropy": 0.48496192333899246, "calibration/coverage@0%": 0.1004148895158747, "calibration/coverage@1%": 0.19970107002610285, "calibration/coverage@10%": 0.6926593040155616, "calibration/coverage@15%": 0.8085424051687502, "calibration/coverage@20%": 0.8868322054716925, "calibration/coverage@25%": 0.9545527002885805, "calibration/coverage@30%": 0.970757180156658, "calibration/coverage@5%": 0.5066726077108479, "calibration/ece": 0.178456279434725, "calibration/mean_confidence": 0.5863382511632422, "calibration/prompt_uniqueness": 0.8510821885227383, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666652, "completions/max_length": 3804.2, "completions/max_terminated_length": 3804.2, "completions/mean_length": 778.0964477539062, "completions/mean_terminated_length": 786.2614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 262.0, "epoch": 0.4319946000674992, "grad_norm": 0.0003696930652949959, "learning_rate": 8.433734939759036e-07, "loss": -0.0084, "num_tokens": 404063238.0, "reward": 0.9986873149871827, "reward_std": 0.12596749514341354, "rewards/accuracy_reward": 0.7086805582046509, "rewards/brier_reward": 0.8022766947746277, "rewards/confidence_uniqueness_reward": 0.9392195105552673, "rewards/format_reward": 0.9894965291023254, "rewards/frontier_aurc_reward": -0.0018584353383630514, "rewards/frontier_coverage_0": 0.00486559234559536, "rewards/frontier_coverage_1": 0.00486559234559536, "rewards/frontier_coverage_10": 0.0048669856041669846, "rewards/frontier_coverage_15": 0.018902628193609418, "rewards/frontier_coverage_20": 0.06483815237879753, "rewards/frontier_coverage_25": 0.14728063642978667, "rewards/frontier_coverage_5": 0.00486559234559536, "rewards/frontier_ece_reward": -0.0008010620949789882, "rewards/frontier_entropy_batch_reward": -0.2757860660552979, "signal/accuracy_reward/centered_abs_mean": 0.151953125, "signal/accuracy_reward/group_bin_occupancy": 0.19305555555555556, "signal/accuracy_reward/group_std_mean": 0.19671571254730225, "signal/accuracy_reward/group_zero_std_frac": 0.4555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0759765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0759765625, "signal/advantage_abs_mean": 0.09426534920930862, "signal/advantage_pre_scale_abs_mean": 0.09426534920930862, "signal/advantage_pre_scale_std": 0.1520604223012924, "signal/advantage_std": 0.1520604223012924, "signal/brier_reward/centered_abs_mean": 0.13269921243190766, "signal/brier_reward/group_bin_occupancy": 0.8253472222222221, "signal/brier_reward/group_std_mean": 0.1710539847612381, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013269922323524952, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013269922323524952, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02954910360276699, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8520833333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.04796513915061951, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029549104161560535, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029549104161560535, "signal/format_reward/centered_abs_mean": 0.01773546002805233, "signal/format_reward/group_bin_occupancy": 0.14270833333333333, "signal/format_reward/group_std_mean": 0.0337453979998827, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008867730014026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008867730014026165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022768301889300345, "signal/frontier_aurc_reward/group_bin_occupancy": 0.68125, "signal/frontier_aurc_reward/group_std_mean": 0.0042192541994154455, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8460378598538227e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8460378598538227e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18299897611141205, "signal/frontier_coverage_0/group_bin_occupancy": 0.8215277777777779, "signal/frontier_coverage_0/group_std_mean": 0.23625112175941468, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022874871734529733, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022874871734529733, "signal/frontier_coverage_1/centered_abs_mean": 0.18299897611141205, "signal/frontier_coverage_1/group_bin_occupancy": 0.8215277777777779, "signal/frontier_coverage_1/group_std_mean": 0.23625112175941468, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022874871734529733, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022874871734529733, "signal/frontier_coverage_10/centered_abs_mean": 0.18299424648284912, "signal/frontier_coverage_10/group_bin_occupancy": 0.8215277777777779, "signal/frontier_coverage_10/group_std_mean": 0.23624541461467743, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002287428034469485, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002287428034469485, "signal/frontier_coverage_15/centered_abs_mean": 0.10037829428911209, "signal/frontier_coverage_15/group_bin_occupancy": 0.8378472222222223, "signal/frontier_coverage_15/group_std_mean": 0.13245663940906524, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012547286925837398, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012547286925837398, "signal/frontier_coverage_20/centered_abs_mean": 0.06611849516630172, "signal/frontier_coverage_20/group_bin_occupancy": 0.9319444444444445, "signal/frontier_coverage_20/group_std_mean": 0.0842194378376007, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008264812408015132, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008264812408015132, "signal/frontier_coverage_25/centered_abs_mean": 0.10266990959644318, "signal/frontier_coverage_25/group_bin_occupancy": 0.8899305555555557, "signal/frontier_coverage_25/group_std_mean": 0.1339954525232315, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012833738466724754, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012833738466724754, "signal/frontier_coverage_5/centered_abs_mean": 0.18299897611141205, "signal/frontier_coverage_5/group_bin_occupancy": 0.8215277777777779, "signal/frontier_coverage_5/group_std_mean": 0.23625112175941468, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022874871734529733, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022874871734529733, "signal/frontier_ece_reward/centered_abs_mean": 0.019692152738571167, "signal/frontier_ece_reward/group_bin_occupancy": 0.8805555555555555, "signal/frontier_ece_reward/group_std_mean": 0.024879150092601776, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001969215413555503, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001969215413555503, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3193018019199371, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7447916666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38962661623954775, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031930181011557576, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031930181011557576, "step": 180 }, { "calibration/aurc": 0.16412042486004724, "calibration/batch_distribution_entropy": 0.9638283593070748, "calibration/batch_entropy_100bins": 0.952921553941341, "calibration/batch_entropy_10bins": 0.9638283593070748, "calibration/batch_entropy_50bins": 0.961955903293342, "calibration/batch_uniqueness": 0.9487742638138078, "calibration/buffer_distribution_entropy": 0.9855597957864515, "calibration/buffer_entropy_100bins": 0.9923941439583099, "calibration/buffer_entropy_10bins": 0.9855597957864515, "calibration/buffer_entropy_50bins": 0.9911491227931158, "calibration/confidence_entropy": 0.49912697432262654, "calibration/coverage@0%": 0.015748031496062992, "calibration/coverage@1%": 0.015748031496062992, "calibration/coverage@10%": 0.23657545417788833, "calibration/coverage@15%": 0.7106708432938508, "calibration/coverage@20%": 0.8636267072360081, "calibration/coverage@25%": 0.9296587926509187, "calibration/coverage@30%": 0.9469816272965879, "calibration/coverage@5%": 0.09396325459317587, "calibration/ece": 0.21072273147614853, "calibration/mean_confidence": 0.5744268236371306, "calibration/prompt_uniqueness": 0.8518122594254635, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011631944444444464, "completions/max_length": 3182.0, "completions/max_terminated_length": 3182.0, "completions/mean_length": 764.2948852539063, "completions/mean_terminated_length": 773.3405883789062, "completions/min_length": 0.0, "completions/min_terminated_length": 213.8, "epoch": 0.44399445006937416, "grad_norm": 0.00039282196667045355, "learning_rate": 6.927710843373495e-07, "loss": -0.0094, "num_tokens": 415957963.0, "reward": 0.9903581857681274, "reward_std": 0.13288189321756363, "rewards/accuracy_reward": 0.6887152671813965, "rewards/brier_reward": 0.8001930832862854, "rewards/confidence_uniqueness_reward": 0.9394657015800476, "rewards/format_reward": 0.9883680582046509, "rewards/frontier_aurc_reward": -0.0016249807551503182, "rewards/frontier_coverage_0": 0.014122280664741993, "rewards/frontier_coverage_1": 0.014122280664741993, "rewards/frontier_coverage_10": 0.014189984847325832, "rewards/frontier_coverage_15": 0.030132049694657326, "rewards/frontier_coverage_20": 0.0688400574028492, "rewards/frontier_coverage_25": 0.14342034608125687, "rewards/frontier_coverage_5": 0.014122280664741993, "rewards/frontier_ece_reward": -0.0014869593200273813, "rewards/frontier_entropy_batch_reward": -0.25717237293720246, "signal/accuracy_reward/centered_abs_mean": 0.1658420145511627, "signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776, "signal/accuracy_reward/group_std_mean": 0.21452577412128448, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08292100727558135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08292100727558135, "signal/advantage_abs_mean": 0.09931548237800598, "signal/advantage_pre_scale_abs_mean": 0.09931548237800598, "signal/advantage_pre_scale_std": 0.1559781938791275, "signal/advantage_std": 0.1559781938791275, "signal/brier_reward/centered_abs_mean": 0.13490980863571167, "signal/brier_reward/group_bin_occupancy": 0.8333333333333334, "signal/brier_reward/group_std_mean": 0.1740681231021881, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013490980863571167, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013490980863571167, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0304035734385252, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.840625, "signal/confidence_uniqueness_reward/group_std_mean": 0.05106213316321373, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030403575394302605, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030403575394302605, "signal/format_reward/centered_abs_mean": 0.01956380195915699, "signal/format_reward/group_bin_occupancy": 0.14513888888888887, "signal/format_reward/group_std_mean": 0.03807148076593876, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009781900979578494, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009781900979578494, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019493137020617723, "signal/frontier_aurc_reward/group_bin_occupancy": 0.679513888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0034036038909107448, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.436642062093597e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.436642062093597e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1955573081970215, "signal/frontier_coverage_0/group_bin_occupancy": 0.83125, "signal/frontier_coverage_0/group_std_mean": 0.2514010012149811, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024444664362818004, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024444664362818004, "signal/frontier_coverage_1/centered_abs_mean": 0.1955573081970215, "signal/frontier_coverage_1/group_bin_occupancy": 0.83125, "signal/frontier_coverage_1/group_std_mean": 0.2514010012149811, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024444664362818004, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024444664362818004, "signal/frontier_coverage_10/centered_abs_mean": 0.19504148066043853, "signal/frontier_coverage_10/group_bin_occupancy": 0.83125, "signal/frontier_coverage_10/group_std_mean": 0.2507701963186264, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002438018564134836, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002438018564134836, "signal/frontier_coverage_15/centered_abs_mean": 0.08795134276151657, "signal/frontier_coverage_15/group_bin_occupancy": 0.8697916666666667, "signal/frontier_coverage_15/group_std_mean": 0.11580315828323365, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010993918171152473, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010993918171152473, "signal/frontier_coverage_20/centered_abs_mean": 0.06347033008933067, "signal/frontier_coverage_20/group_bin_occupancy": 0.9170138888888889, "signal/frontier_coverage_20/group_std_mean": 0.08160731345415115, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007933791261166334, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007933791261166334, "signal/frontier_coverage_25/centered_abs_mean": 0.10018244087696075, "signal/frontier_coverage_25/group_bin_occupancy": 0.88125, "signal/frontier_coverage_25/group_std_mean": 0.13163245618343353, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012522805249318481, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012522805249318481, "signal/frontier_coverage_5/centered_abs_mean": 0.1955573081970215, "signal/frontier_coverage_5/group_bin_occupancy": 0.83125, "signal/frontier_coverage_5/group_std_mean": 0.2514010012149811, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024444664362818004, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024444664362818004, "signal/frontier_ece_reward/centered_abs_mean": 0.020365006104111672, "signal/frontier_ece_reward/group_bin_occupancy": 0.8649305555555555, "signal/frontier_ece_reward/group_std_mean": 0.025510191544890405, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00203650058247149, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00203650058247149, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3127790868282318, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7649305555555554, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38549832701683046, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03127790912985802, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03127790912985802, "step": 185 }, { "calibration/aurc": 0.1639593065228973, "calibration/batch_distribution_entropy": 0.9539581142043598, "calibration/batch_entropy_100bins": 0.9483003354272391, "calibration/batch_entropy_10bins": 0.9539581142043598, "calibration/batch_entropy_50bins": 0.957835409824707, "calibration/batch_uniqueness": 0.9477393587487668, "calibration/buffer_distribution_entropy": 0.9859487971640286, "calibration/buffer_entropy_100bins": 0.9926121558733081, "calibration/buffer_entropy_10bins": 0.9859487971640286, "calibration/buffer_entropy_50bins": 0.9914107968063413, "calibration/confidence_entropy": 0.4999114909521826, "calibration/coverage@0%": 0.024051083448119896, "calibration/coverage@1%": 0.024051083448119896, "calibration/coverage@10%": 0.33487271540469976, "calibration/coverage@15%": 0.5131284671539099, "calibration/coverage@20%": 0.7305083738512776, "calibration/coverage@25%": 0.9254270017406441, "calibration/coverage@30%": 0.9780678851174935, "calibration/coverage@5%": 0.17004451834186524, "calibration/ece": 0.20221066968086862, "calibration/mean_confidence": 0.5911521434855616, "calibration/prompt_uniqueness": 0.8555883534825799, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004687499999999978, "completions/max_length": 3312.2, "completions/max_terminated_length": 3312.2, "completions/mean_length": 765.2302124023438, "completions/mean_terminated_length": 768.8353393554687, "completions/min_length": 0.0, "completions/min_terminated_length": 241.0, "epoch": 0.45599430007124914, "grad_norm": 0.0003932048275601119, "learning_rate": 5.421686746987952e-07, "loss": -0.003, "num_tokens": 427856359.0, "reward": 1.0095869898796082, "reward_std": 0.12467661201953888, "rewards/accuracy_reward": 0.7232638955116272, "rewards/brier_reward": 0.8051016926765442, "rewards/confidence_uniqueness_reward": 0.9449184775352478, "rewards/format_reward": 0.9951388835906982, "rewards/frontier_aurc_reward": -0.0015021357918158173, "rewards/frontier_coverage_0": -0.003796599945053458, "rewards/frontier_coverage_1": -0.003796599945053458, "rewards/frontier_coverage_10": -0.0035698655527085068, "rewards/frontier_coverage_15": 0.02750418670475483, "rewards/frontier_coverage_20": 0.07788380682468414, "rewards/frontier_coverage_25": 0.15804124176502227, "rewards/frontier_coverage_5": -0.003777299216017127, "rewards/frontier_ece_reward": -0.004084828868508339, "rewards/frontier_entropy_batch_reward": -0.27295289635658265, "signal/accuracy_reward/centered_abs_mean": 0.1623914957046509, "signal/accuracy_reward/group_bin_occupancy": 0.20243055555555553, "signal/accuracy_reward/group_std_mean": 0.21437447667121887, "signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08119574785232545, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08119574785232545, "signal/advantage_abs_mean": 0.09301377832889557, "signal/advantage_pre_scale_abs_mean": 0.09301377832889557, "signal/advantage_pre_scale_std": 0.14447612464427947, "signal/advantage_std": 0.14447612464427947, "signal/brier_reward/centered_abs_mean": 0.1285407453775406, "signal/brier_reward/group_bin_occupancy": 0.8503472222222221, "signal/brier_reward/group_std_mean": 0.16547386050224305, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012854074873030186, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012854074873030186, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021638569980859758, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8777777777777779, "signal/confidence_uniqueness_reward/group_std_mean": 0.0373595766723156, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002163857058621943, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002163857058621943, "signal/format_reward/centered_abs_mean": 0.00916883684694767, "signal/format_reward/group_bin_occupancy": 0.1388888888888889, "signal/format_reward/group_std_mean": 0.022394910082221033, "signal/format_reward/group_zero_std_frac": 0.8888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004584418423473835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004584418423473835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017511979909613728, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6802083333333333, "signal/frontier_aurc_reward/group_std_mean": 0.0030707920901477336, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.18899756873725e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.18899756873725e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19192939400672912, "signal/frontier_coverage_0/group_bin_occupancy": 0.83125, "signal/frontier_coverage_0/group_std_mean": 0.24938672184944152, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023991174064576628, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023991174064576628, "signal/frontier_coverage_1/centered_abs_mean": 0.19192939400672912, "signal/frontier_coverage_1/group_bin_occupancy": 0.83125, "signal/frontier_coverage_1/group_std_mean": 0.24938672184944152, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023991174064576628, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023991174064576628, "signal/frontier_coverage_10/centered_abs_mean": 0.19131710529327392, "signal/frontier_coverage_10/group_bin_occupancy": 0.8326388888888889, "signal/frontier_coverage_10/group_std_mean": 0.24862921237945557, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023914637975394728, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023914637975394728, "signal/frontier_coverage_15/centered_abs_mean": 0.07733116149902344, "signal/frontier_coverage_15/group_bin_occupancy": 0.8697916666666666, "signal/frontier_coverage_15/group_std_mean": 0.10203811377286912, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009666395024396479, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009666395024396479, "signal/frontier_coverage_20/centered_abs_mean": 0.06752799674868584, "signal/frontier_coverage_20/group_bin_occupancy": 0.9149305555555556, "signal/frontier_coverage_20/group_std_mean": 0.0865270435810089, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008440999779850244, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008440999779850244, "signal/frontier_coverage_25/centered_abs_mean": 0.1067051038146019, "signal/frontier_coverage_25/group_bin_occupancy": 0.884375, "signal/frontier_coverage_25/group_std_mean": 0.13877106308937073, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013338138349354267, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013338138349354267, "signal/frontier_coverage_5/centered_abs_mean": 0.1918783277273178, "signal/frontier_coverage_5/group_bin_occupancy": 0.83125, "signal/frontier_coverage_5/group_std_mean": 0.24932389259338378, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00239847912453115, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00239847912453115, "signal/frontier_ece_reward/centered_abs_mean": 0.020100595057010652, "signal/frontier_ece_reward/group_bin_occupancy": 0.8600694444444444, "signal/frontier_ece_reward/group_std_mean": 0.02535393163561821, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002010059542953968, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002010059542953968, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32644957304000854, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7652777777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39766885042190553, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0326449565589428, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326449565589428, "step": 190 }, { "calibration/aurc": 0.1706777636504669, "calibration/batch_distribution_entropy": 0.9790970951079817, "calibration/batch_entropy_100bins": 0.9607017265781762, "calibration/batch_entropy_10bins": 0.9790970951079817, "calibration/batch_entropy_50bins": 0.9733776016996145, "calibration/batch_uniqueness": 0.9518248437650525, "calibration/buffer_distribution_entropy": 0.985472118915245, "calibration/buffer_entropy_100bins": 0.9923770458774129, "calibration/buffer_entropy_10bins": 0.985472118915245, "calibration/buffer_entropy_50bins": 0.9911377661542394, "calibration/confidence_entropy": 0.49873662902140675, "calibration/coverage@0%": 0.033667852911883595, "calibration/coverage@1%": 0.033667852911883595, "calibration/coverage@10%": 0.3685121918209364, "calibration/coverage@15%": 0.4559732123356472, "calibration/coverage@20%": 0.5829903013320054, "calibration/coverage@25%": 0.7578634163247334, "calibration/coverage@30%": 0.9112590887234259, "calibration/coverage@5%": 0.27687617078576193, "calibration/ece": 0.18384669897594583, "calibration/mean_confidence": 0.551976400559588, "calibration/prompt_uniqueness": 0.848788807132955, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555557, "completions/max_length": 3561.0, "completions/max_terminated_length": 3561.0, "completions/mean_length": 777.999560546875, "completions/mean_terminated_length": 786.172314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 231.8, "epoch": 0.46799415007312406, "grad_norm": 0.00040629622526466846, "learning_rate": 3.91566265060241e-07, "loss": -0.0077, "num_tokens": 439899778.0, "reward": 0.986942994594574, "reward_std": 0.1280568614602089, "rewards/accuracy_reward": 0.6803819537162781, "rewards/brier_reward": 0.7949827075004577, "rewards/confidence_uniqueness_reward": 0.9411714434623718, "rewards/format_reward": 0.9897569417953491, "rewards/frontier_aurc_reward": -0.0018961447989568115, "rewards/frontier_coverage_0": 0.012605349812656642, "rewards/frontier_coverage_1": 0.012605349812656642, "rewards/frontier_coverage_10": 0.012725694989785551, "rewards/frontier_coverage_15": 0.03210941143333912, "rewards/frontier_coverage_20": 0.07903910428285599, "rewards/frontier_coverage_25": 0.15094499289989471, "rewards/frontier_coverage_5": 0.012612746376544238, "rewards/frontier_ece_reward": -0.0022546866443008185, "rewards/frontier_entropy_batch_reward": -0.25400757491588594, "signal/accuracy_reward/centered_abs_mean": 0.1535264790058136, "signal/accuracy_reward/group_bin_occupancy": 0.19861111111111113, "signal/accuracy_reward/group_std_mean": 0.20471644699573516, "signal/accuracy_reward/group_zero_std_frac": 0.41111111640930176, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0767632395029068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0767632395029068, "signal/advantage_abs_mean": 0.09638960659503937, "signal/advantage_pre_scale_abs_mean": 0.09638960659503937, "signal/advantage_pre_scale_std": 0.1507657587528229, "signal/advantage_std": 0.1507657587528229, "signal/brier_reward/centered_abs_mean": 0.1326186940073967, "signal/brier_reward/group_bin_occupancy": 0.8368055555555556, "signal/brier_reward/group_std_mean": 0.17211284935474397, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013261870108544826, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013261870108544826, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026984479278326035, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8788194444444445, "signal/confidence_uniqueness_reward/group_std_mean": 0.04143795669078827, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026984480675309895, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026984480675309895, "signal/format_reward/centered_abs_mean": 0.01567925335839391, "signal/format_reward/group_bin_occupancy": 0.1388888888888889, "signal/format_reward/group_std_mean": 0.027652311697602273, "signal/format_reward/group_zero_std_frac": 0.8888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007839626679196954, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007839626679196954, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023088094778358935, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6784722222222221, "signal/frontier_aurc_reward/group_std_mean": 0.00435796077363193, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.88601171632763e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.88601171632763e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18040402829647065, "signal/frontier_coverage_0/group_bin_occupancy": 0.840625, "signal/frontier_coverage_0/group_std_mean": 0.23687632083892823, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022550504421815277, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022550504421815277, "signal/frontier_coverage_1/centered_abs_mean": 0.18040402829647065, "signal/frontier_coverage_1/group_bin_occupancy": 0.840625, "signal/frontier_coverage_1/group_std_mean": 0.23687632083892823, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022550504421815277, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022550504421815277, "signal/frontier_coverage_10/centered_abs_mean": 0.17983727753162385, "signal/frontier_coverage_10/group_bin_occupancy": 0.840625, "signal/frontier_coverage_10/group_std_mean": 0.23616442382335662, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022479661041870714, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022479661041870714, "signal/frontier_coverage_15/centered_abs_mean": 0.06941870003938674, "signal/frontier_coverage_15/group_bin_occupancy": 0.8961805555555555, "signal/frontier_coverage_15/group_std_mean": 0.09146715700626373, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008677337900735438, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008677337900735438, "signal/frontier_coverage_20/centered_abs_mean": 0.06973416805267334, "signal/frontier_coverage_20/group_bin_occupancy": 0.9184027777777779, "signal/frontier_coverage_20/group_std_mean": 0.09039737284183502, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008716771146282554, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008716771146282554, "signal/frontier_coverage_25/centered_abs_mean": 0.11032099574804306, "signal/frontier_coverage_25/group_bin_occupancy": 0.8857638888888889, "signal/frontier_coverage_25/group_std_mean": 0.14418871700763702, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013790124328806996, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013790124328806996, "signal/frontier_coverage_5/centered_abs_mean": 0.18039599657058716, "signal/frontier_coverage_5/group_bin_occupancy": 0.840625, "signal/frontier_coverage_5/group_std_mean": 0.23686636984348297, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022549499990418553, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022549499990418553, "signal/frontier_ece_reward/centered_abs_mean": 0.018910813704133035, "signal/frontier_ece_reward/group_bin_occupancy": 0.8722222222222221, "signal/frontier_ece_reward/group_std_mean": 0.024173206835985183, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018910813611000775, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018910813611000775, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.312946754693985, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7586805555555556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3856872797012329, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031294677406549454, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031294677406549454, "step": 195 }, { "calibration/aurc": 0.13533203632938168, "calibration/batch_distribution_entropy": 0.9504027952989377, "calibration/batch_entropy_100bins": 0.9457547318105052, "calibration/batch_entropy_10bins": 0.9504027952989377, "calibration/batch_entropy_50bins": 0.9554350823985063, "calibration/batch_uniqueness": 0.9465724141007226, "calibration/buffer_distribution_entropy": 0.9853970155873112, "calibration/buffer_entropy_100bins": 0.9923398769087163, "calibration/buffer_entropy_10bins": 0.9853970155873112, "calibration/buffer_entropy_50bins": 0.9911092633839509, "calibration/confidence_entropy": 0.5118639401115025, "calibration/coverage@0%": 0.02978339992651786, "calibration/coverage@1%": 0.02978339992651786, "calibration/coverage@10%": 0.4239939262423073, "calibration/coverage@15%": 0.5175246279966933, "calibration/coverage@20%": 0.8703728357215027, "calibration/coverage@25%": 0.9216995614035088, "calibration/coverage@30%": 0.9515789473684212, "calibration/coverage@5%": 0.31793581220721967, "calibration/ece": 0.1757973301190792, "calibration/mean_confidence": 0.6121883324285666, "calibration/prompt_uniqueness": 0.8583440433333885, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006597222222222232, "completions/max_length": 3650.8, "completions/max_terminated_length": 3650.8, "completions/mean_length": 752.8812622070312, "completions/mean_terminated_length": 757.93525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 244.2, "epoch": 0.47999400007499904, "grad_norm": 0.00041422457434237003, "learning_rate": 2.409638554216868e-07, "loss": -0.0054, "num_tokens": 451640778.0, "reward": 1.0022491931915283, "reward_std": 0.12217179834842681, "rewards/accuracy_reward": 0.7116319417953492, "rewards/brier_reward": 0.8065296888351441, "rewards/confidence_uniqueness_reward": 0.9426613330841065, "rewards/format_reward": 0.9933159708976745, "rewards/frontier_aurc_reward": -0.0018987123388797046, "rewards/frontier_coverage_0": 0.001754038338549435, "rewards/frontier_coverage_1": 0.001754038338549435, "rewards/frontier_coverage_10": 0.0023555623716674744, "rewards/frontier_coverage_15": 0.03378410004079342, "rewards/frontier_coverage_20": 0.0899100884795189, "rewards/frontier_coverage_25": 0.16996320486068725, "rewards/frontier_coverage_5": 0.001754038338549435, "rewards/frontier_ece_reward": -0.003993240976706147, "rewards/frontier_entropy_batch_reward": -0.28486764430999756, "signal/accuracy_reward/centered_abs_mean": 0.1452473983168602, "signal/accuracy_reward/group_bin_occupancy": 0.19479166666666667, "signal/accuracy_reward/group_std_mean": 0.19380164742469788, "signal/accuracy_reward/group_zero_std_frac": 0.44166666865348814, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0726236991584301, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0726236991584301, "signal/advantage_abs_mean": 0.09101023375988007, "signal/advantage_pre_scale_abs_mean": 0.09101023375988007, "signal/advantage_pre_scale_std": 0.14412825107574462, "signal/advantage_std": 0.14412825107574462, "signal/brier_reward/centered_abs_mean": 0.12795960605144502, "signal/brier_reward/group_bin_occupancy": 0.8381944444444445, "signal/brier_reward/group_std_mean": 0.16680757701396942, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012795961275696755, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012795961275696755, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02437374070286751, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8774305555555555, "signal/confidence_uniqueness_reward/group_std_mean": 0.039960439503192904, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002437374135479331, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002437374135479331, "signal/format_reward/centered_abs_mean": 0.011984592024236917, "signal/format_reward/group_bin_occupancy": 0.1388888888888889, "signal/format_reward/group_std_mean": 0.024961976706981658, "signal/format_reward/group_zero_std_frac": 0.8888889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005992296012118458, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005992296012118458, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022553114220499994, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6885416666666668, "signal/frontier_aurc_reward/group_std_mean": 0.003917370270937681, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.81913933577016e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.81913933577016e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1716437578201294, "signal/frontier_coverage_0/group_bin_occupancy": 0.8284722222222223, "signal/frontier_coverage_0/group_std_mean": 0.22425118684768677, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021455470705404878, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021455470705404878, "signal/frontier_coverage_1/centered_abs_mean": 0.1716437578201294, "signal/frontier_coverage_1/group_bin_occupancy": 0.8284722222222223, "signal/frontier_coverage_1/group_std_mean": 0.22425118684768677, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021455470705404878, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021455470705404878, "signal/frontier_coverage_10/centered_abs_mean": 0.16941776275634765, "signal/frontier_coverage_10/group_bin_occupancy": 0.8256944444444445, "signal/frontier_coverage_10/group_std_mean": 0.22150866985321044, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021177220391109587, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021177220391109587, "signal/frontier_coverage_15/centered_abs_mean": 0.06261588633060455, "signal/frontier_coverage_15/group_bin_occupancy": 0.898263888888889, "signal/frontier_coverage_15/group_std_mean": 0.08280375897884369, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007826985907740891, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007826985907740891, "signal/frontier_coverage_20/centered_abs_mean": 0.07480958104133606, "signal/frontier_coverage_20/group_bin_occupancy": 0.9163194444444445, "signal/frontier_coverage_20/group_std_mean": 0.09603887796401978, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009351198212243616, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009351198212243616, "signal/frontier_coverage_25/centered_abs_mean": 0.11988835930824279, "signal/frontier_coverage_25/group_bin_occupancy": 0.8927083333333334, "signal/frontier_coverage_25/group_std_mean": 0.1552154928445816, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014986045192927123, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014986045192927123, "signal/frontier_coverage_5/centered_abs_mean": 0.1716437578201294, "signal/frontier_coverage_5/group_bin_occupancy": 0.8284722222222223, "signal/frontier_coverage_5/group_std_mean": 0.22425118684768677, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021455470705404878, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021455470705404878, "signal/frontier_ece_reward/centered_abs_mean": 0.01908930353820324, "signal/frontier_ece_reward/group_bin_occupancy": 0.8739583333333334, "signal/frontier_ece_reward/group_std_mean": 0.023905428871512414, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019089303910732268, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019089303910732268, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.328853166103363, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39871172308921815, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03288531787693501, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03288531787693501, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.14756764044678702, "eval_calibration/batch_distribution_entropy": 0.9258539583207644, "eval_calibration/batch_entropy_100bins": 0.705329766878307, "eval_calibration/batch_entropy_10bins": 0.9258539583207644, "eval_calibration/batch_entropy_50bins": 0.7811867066286015, "eval_calibration/batch_uniqueness": 0.8942650071540063, "eval_calibration/buffer_distribution_entropy": 0.9848900187220755, "eval_calibration/buffer_entropy_100bins": 0.9920892460675286, "eval_calibration/buffer_entropy_10bins": 0.9848900187220755, "eval_calibration/buffer_entropy_50bins": 0.9908194394047567, "eval_calibration/confidence_entropy": 0.48096033335039917, "eval_calibration/coverage@0%": 0.25739247311827956, "eval_calibration/coverage@1%": 0.25739247311827956, "eval_calibration/coverage@10%": 0.4065860215053763, "eval_calibration/coverage@15%": 0.6117271505376344, "eval_calibration/coverage@20%": 0.70127688172043, "eval_calibration/coverage@25%": 0.936491935483871, "eval_calibration/coverage@30%": 0.9946236559139785, "eval_calibration/coverage@5%": 0.25739247311827956, "eval_calibration/ece": 0.22866607442963627, "eval_calibration/mean_confidence": 0.5914768839699495, "eval_calibration/prompt_uniqueness": 0.8942650071540063, "eval_completions/clipped_ratio": 0.00434027777777779, "eval_completions/max_length": 2311.6666666666665, "eval_completions/max_terminated_length": 2311.6666666666665, "eval_completions/mean_length": 765.7859497070312, "eval_completions/mean_terminated_length": 769.0821126302084, "eval_completions/min_length": 147.33333333333334, "eval_completions/min_terminated_length": 270.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 451640778.0, "eval_reward": 0.9159158170223236, "eval_reward_std": 0.2335003837943077, "eval_rewards/accuracy_reward": 0.6875, "eval_rewards/brier_reward": 0.8085997502009074, "eval_rewards/confidence_uniqueness_reward": 0.8914510905742645, "eval_rewards/format_reward": 0.9939236144224802, "eval_rewards/frontier_aurc_reward": -0.0018178720492869616, "eval_rewards/frontier_coverage_0": 0.019008846589713357, "eval_rewards/frontier_coverage_1": 0.019008846589713357, "eval_rewards/frontier_coverage_10": 0.01966132320618878, "eval_rewards/frontier_coverage_15": 0.039734075466791786, "eval_rewards/frontier_coverage_20": 0.09554279471437137, "eval_rewards/frontier_coverage_25": 0.17320440957943598, "eval_rewards/frontier_coverage_5": 0.019010902382433414, "eval_rewards/frontier_ece_reward": -0.0020065721619175747, "eval_rewards/frontier_entropy_batch_reward": -0.9939236144224802, "eval_runtime": 181.543, "eval_samples_per_second": 5.508, "eval_signal/accuracy_reward/centered_abs_mean": 0.4176432291666667, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.463163028160731, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20882161458333334, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20882161458333334, "eval_signal/advantage_abs_mean": 0.20389158527056375, "eval_signal/advantage_pre_scale_abs_mean": 0.20389158527056375, "eval_signal/advantage_pre_scale_std": 0.23148142298062643, "eval_signal/advantage_std": 0.23148142298062643, "eval_signal/brier_reward/centered_abs_mean": 0.17776375015576681, "eval_signal/brier_reward/group_bin_occupancy": 0.8645833333333334, "eval_signal/brier_reward/group_std_mean": 0.227944349249204, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01777637532601754, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01777637532601754, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04837593622505665, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40624999999999994, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0713024524350961, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004837593606983622, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004837593606983622, "eval_signal/format_reward/centered_abs_mean": 0.01177300326526165, "eval_signal/format_reward/group_bin_occupancy": 0.14930555555555555, "eval_signal/format_reward/group_std_mean": 0.034373246133327484, "eval_signal/format_reward/group_zero_std_frac": 0.8055555721124014, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005886501632630825, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.005886501632630825, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030702337777862945, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5902777777777778, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006556036416441202, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.83779224648606e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.83779224648606e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.26045608272155124, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9444444444444443, "eval_signal/frontier_coverage_0/group_std_mean": 0.3647051453590393, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003255701197000841, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003255701197000841, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.26045608272155124, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9444444444444443, "eval_signal/frontier_coverage_1/group_std_mean": 0.3647051453590393, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003255701197000841, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003255701197000841, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.25206231077512103, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9444444444444443, "eval_signal/frontier_coverage_10/group_std_mean": 0.35428506632645923, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003150779055431485, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003150779055431485, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.07933254291613896, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9097222222222222, "eval_signal/frontier_coverage_15/group_std_mean": 0.11005314812064171, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009916568330178659, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009916568330178659, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.12251939376195271, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9409722222222223, "eval_signal/frontier_coverage_20/group_std_mean": 0.15402295937140784, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015314924918736021, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015314924918736021, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.22701545556386313, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.2766217887401581, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028376932411144176, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028376932411144176, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2604496479034424, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9444444444444443, "eval_signal/frontier_coverage_5/group_std_mean": 0.36469681064287823, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032556206764032445, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032556206764032445, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0270277534921964, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9409722222222222, "eval_signal/frontier_ece_reward/group_std_mean": 0.034523426865537964, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002702775450112919, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002702775450112919, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.01177300326526165, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.14930555555555555, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.034373246133327484, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8055555721124014, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0011773003886143367, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0011773003886143367, "eval_steps_per_second": 0.033, "step": 200 }, { "calibration/aurc": 0.1652033711867475, "calibration/batch_distribution_entropy": 0.9384595336180503, "calibration/batch_entropy_100bins": 0.9400339651108552, "calibration/batch_entropy_10bins": 0.9384595336180503, "calibration/batch_entropy_50bins": 0.9475336542177166, "calibration/batch_uniqueness": 0.9436695127060112, "calibration/buffer_distribution_entropy": 0.9847586785563566, "calibration/buffer_entropy_100bins": 0.9920408823055707, "calibration/buffer_entropy_10bins": 0.9847586785563566, "calibration/buffer_entropy_50bins": 0.9907514046533752, "calibration/confidence_entropy": 0.4783915000591916, "calibration/coverage@0%": 0.01204360634059349, "calibration/coverage@1%": 0.01204360634059349, "calibration/coverage@10%": 0.2860696019089487, "calibration/coverage@15%": 0.4156604098828991, "calibration/coverage@20%": 0.8064645320617825, "calibration/coverage@25%": 0.9036748593935308, "calibration/coverage@30%": 0.9464806869956538, "calibration/coverage@5%": 0.08500383239346383, "calibration/ece": 0.12629128286363533, "calibration/mean_confidence": 0.6265111829809916, "calibration/prompt_uniqueness": 0.8555825096487398, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006076388888888884, "completions/max_length": 3488.0, "completions/max_terminated_length": 3488.0, "completions/mean_length": 771.4147583007813, "completions/mean_terminated_length": 776.1726318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 231.6, "epoch": 0.491993850076874, "grad_norm": 0.00033893538056872785, "learning_rate": 9.036144578313253e-08, "loss": -0.0041, "num_tokens": 463593428.0, "reward": 1.0165271043777466, "reward_std": 0.12039815932512284, "rewards/accuracy_reward": 0.7393229007720947, "rewards/brier_reward": 0.8207941651344299, "rewards/confidence_uniqueness_reward": 0.9416178464889526, "rewards/format_reward": 0.9938368082046509, "rewards/frontier_aurc_reward": -0.0013881307444535197, "rewards/frontier_coverage_0": 0.0027549955993890762, "rewards/frontier_coverage_1": 0.0027549955993890762, "rewards/frontier_coverage_10": 0.005501348234247416, "rewards/frontier_coverage_15": 0.04494566917419433, "rewards/frontier_coverage_20": 0.11320073753595353, "rewards/frontier_coverage_25": 0.20637257397174835, "rewards/frontier_coverage_5": 0.0027576935943216087, "rewards/frontier_ece_reward": -0.004469462623819709, "rewards/frontier_entropy_batch_reward": -0.30558276176452637, "signal/accuracy_reward/centered_abs_mean": 0.14855143129825593, "signal/accuracy_reward/group_bin_occupancy": 0.19895833333333332, "signal/accuracy_reward/group_std_mean": 0.2020564168691635, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07427571564912797, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07427571564912797, "signal/advantage_abs_mean": 0.08845392167568207, "signal/advantage_pre_scale_abs_mean": 0.08845392167568207, "signal/advantage_pre_scale_std": 0.14208076894283295, "signal/advantage_std": 0.14208076894283295, "signal/brier_reward/centered_abs_mean": 0.12419998794794082, "signal/brier_reward/group_bin_occupancy": 0.8274305555555556, "signal/brier_reward/group_std_mean": 0.1606125205755234, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01241999827325344, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01241999827325344, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02462100312113762, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8798611111111112, "signal/confidence_uniqueness_reward/group_std_mean": 0.039499569684267044, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024621004471555353, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024621004471555353, "signal/format_reward/centered_abs_mean": 0.0109103734488599, "signal/format_reward/group_bin_occupancy": 0.13784722222222223, "signal/format_reward/group_std_mean": 0.02283493857830763, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00545518672442995, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00545518672442995, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016165346140041947, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6725694444444444, "signal/frontier_aurc_reward/group_std_mean": 0.0028643927304074167, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0206683984724805e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0206683984724805e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17496979832649232, "signal/frontier_coverage_0/group_bin_occupancy": 0.814236111111111, "signal/frontier_coverage_0/group_std_mean": 0.2312620609998703, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021871224977076053, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021871224977076053, "signal/frontier_coverage_1/centered_abs_mean": 0.17496979832649232, "signal/frontier_coverage_1/group_bin_occupancy": 0.814236111111111, "signal/frontier_coverage_1/group_std_mean": 0.2312620609998703, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021871224977076053, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021871224977076053, "signal/frontier_coverage_10/centered_abs_mean": 0.16565968990325927, "signal/frontier_coverage_10/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_10/group_std_mean": 0.21950293183326722, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002070746128447354, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002070746128447354, "signal/frontier_coverage_15/centered_abs_mean": 0.0636880062520504, "signal/frontier_coverage_15/group_bin_occupancy": 0.9159722222222222, "signal/frontier_coverage_15/group_std_mean": 0.08212085962295532, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007961000897921622, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007961000897921622, "signal/frontier_coverage_20/centered_abs_mean": 0.07905065417289733, "signal/frontier_coverage_20/group_bin_occupancy": 0.9180555555555555, "signal/frontier_coverage_20/group_std_mean": 0.1002379298210144, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009881331818178297, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009881331818178297, "signal/frontier_coverage_25/centered_abs_mean": 0.12090887576341629, "signal/frontier_coverage_25/group_bin_occupancy": 0.8913194444444444, "signal/frontier_coverage_25/group_std_mean": 0.15563631057739258, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015113609610125423, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015113609610125423, "signal/frontier_coverage_5/centered_abs_mean": 0.17495956420898437, "signal/frontier_coverage_5/group_bin_occupancy": 0.814236111111111, "signal/frontier_coverage_5/group_std_mean": 0.2312490999698639, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021869946271181107, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021869946271181107, "signal/frontier_ece_reward/centered_abs_mean": 0.01855614297091961, "signal/frontier_ece_reward/group_bin_occupancy": 0.8444444444444444, "signal/frontier_ece_reward/group_std_mean": 0.023571832850575448, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001855614292435348, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001855614292435348, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3295231759548187, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39651084542274473, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03295231983065605, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03295231983065605, "step": 205 }, { "calibration/aurc": 0.07742026917061483, "calibration/batch_distribution_entropy": 0.9536937703254748, "calibration/batch_entropy_100bins": 0.9492115543702568, "calibration/batch_entropy_10bins": 0.9536937703254748, "calibration/batch_entropy_50bins": 0.9579820664320953, "calibration/batch_uniqueness": 0.9475172437688495, "calibration/buffer_distribution_entropy": 0.9842747561446203, "calibration/buffer_entropy_100bins": 0.9917916027464883, "calibration/buffer_entropy_10bins": 0.9842747561446203, "calibration/buffer_entropy_50bins": 0.9904588388299497, "calibration/confidence_entropy": 0.49376543330122047, "calibration/coverage@0%": 0.09232020559648955, "calibration/coverage@1%": 0.09232020559648955, "calibration/coverage@10%": 0.7053527082507439, "calibration/coverage@15%": 0.8343790867542458, "calibration/coverage@20%": 0.9407132995229177, "calibration/coverage@25%": 0.9851657940663175, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5143739832952852, "calibration/ece": 0.15782109592263618, "calibration/mean_confidence": 0.6163736283766634, "calibration/prompt_uniqueness": 0.8510770487877534, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004484953703703683, "completions/max_length": 3489.0, "completions/max_terminated_length": 3489.0, "completions/mean_length": 780.5936075846354, "completions/mean_terminated_length": 784.121337890625, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.49919376007799904, "num_tokens": 470854939.0, "reward": 0.9987632036209106, "reward_std": 0.11778175334135692, "rewards/accuracy_reward": 0.7009548544883728, "rewards/brier_reward": 0.8073068459828695, "rewards/confidence_uniqueness_reward": 0.9438951214154562, "rewards/format_reward": 0.9955150485038757, "rewards/frontier_aurc_reward": -0.001374229167898496, "rewards/frontier_coverage_0": 0.009927504695951939, "rewards/frontier_coverage_1": 0.009927504695951939, "rewards/frontier_coverage_10": 0.011239175374309221, "rewards/frontier_coverage_15": 0.04281615341703097, "rewards/frontier_coverage_20": 0.10291850566864014, "rewards/frontier_coverage_25": 0.1833156297604243, "rewards/frontier_coverage_5": 0.009930253960192204, "rewards/frontier_ece_reward": -0.003355810030673941, "rewards/frontier_entropy_batch_reward": -0.2886508007844289, "signal/accuracy_reward/centered_abs_mean": 0.14790401111046472, "signal/accuracy_reward/group_bin_occupancy": 0.19618055555555555, "signal/accuracy_reward/group_std_mean": 0.19730964303016663, "signal/accuracy_reward/group_zero_std_frac": 0.4305555621782939, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07395200555523236, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07395200555523236, "signal/advantage_abs_mean": 0.08715297033389409, "signal/advantage_pre_scale_abs_mean": 0.08715297033389409, "signal/advantage_pre_scale_std": 0.1391968379418055, "signal/advantage_std": 0.1391968379418055, "signal/brier_reward/centered_abs_mean": 0.1294451653957367, "signal/brier_reward/group_bin_occupancy": 0.84375, "signal/brier_reward/group_std_mean": 0.16660779217878977, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012944516415397326, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012944516415397326, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02156602032482624, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8703703703703703, "signal/confidence_uniqueness_reward/group_std_mean": 0.03804971898595492, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021566021411369243, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021566021411369243, "signal/format_reward/centered_abs_mean": 0.008563006296753883, "signal/format_reward/group_bin_occupancy": 0.1394675925925926, "signal/format_reward/group_std_mean": 0.02229359808067481, "signal/format_reward/group_zero_std_frac": 0.8842592835426331, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004281503148376942, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004281503148376942, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017085896106436849, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6903935185185185, "signal/frontier_aurc_reward/group_std_mean": 0.0031021018512547016, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1357368799120497e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1357368799120497e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18459606170654297, "signal/frontier_coverage_0/group_bin_occupancy": 0.8287037037037037, "signal/frontier_coverage_0/group_std_mean": 0.24103171626726785, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023074508644640446, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023074508644640446, "signal/frontier_coverage_1/centered_abs_mean": 0.18459606170654297, "signal/frontier_coverage_1/group_bin_occupancy": 0.8287037037037037, "signal/frontier_coverage_1/group_std_mean": 0.24103171626726785, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023074508644640446, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023074508644640446, "signal/frontier_coverage_10/centered_abs_mean": 0.17488996187845865, "signal/frontier_coverage_10/group_bin_occupancy": 0.8275462962962963, "signal/frontier_coverage_10/group_std_mean": 0.22865218917528787, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021861245234807334, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021861245234807334, "signal/frontier_coverage_15/centered_abs_mean": 0.06289837509393692, "signal/frontier_coverage_15/group_bin_occupancy": 0.9097222222222223, "signal/frontier_coverage_15/group_std_mean": 0.08136197924613953, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007862297352403402, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007862297352403402, "signal/frontier_coverage_20/centered_abs_mean": 0.07549887150526047, "signal/frontier_coverage_20/group_bin_occupancy": 0.9224537037037037, "signal/frontier_coverage_20/group_std_mean": 0.09611385067303975, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009437358821742237, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009437358821742237, "signal/frontier_coverage_25/centered_abs_mean": 0.11583262433608373, "signal/frontier_coverage_25/group_bin_occupancy": 0.8883101851851851, "signal/frontier_coverage_25/group_std_mean": 0.14927472174167633, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014479078430061538, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014479078430061538, "signal/frontier_coverage_5/centered_abs_mean": 0.18458310763041177, "signal/frontier_coverage_5/group_bin_occupancy": 0.8287037037037037, "signal/frontier_coverage_5/group_std_mean": 0.24101491769154867, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002307288891946276, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002307288891946276, "signal/frontier_ece_reward/centered_abs_mean": 0.01901736669242382, "signal/frontier_ece_reward/group_bin_occupancy": 0.8396990740740741, "signal/frontier_ece_reward/group_std_mean": 0.024036493773261707, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019017367934187253, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019017367934187253, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3173823555310567, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7534722222222222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3859201769034068, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031738235925634704, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031738235925634704, "step": 208, "total_flos": 0.0, "train_loss": -0.01055129385685387, "train_runtime": 40426.5299, "train_samples_per_second": 0.371, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 470854939, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }