{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.49341160918807886, "calibration/batch_distribution_entropy": 0.2677708973812775, "calibration/confidence_entropy": 0.2110494430232559, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4607692188269585, "calibration/mean_confidence": 0.9172499545900774, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020659722222222232, "completions/max_length": 4003.8, "completions/max_terminated_length": 4003.8, "completions/mean_length": 512.6473083496094, "completions/mean_terminated_length": 523.4645141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.004103545099496841, "learning_rate": 5.952380952380953e-07, "loss": 0.0037, "num_tokens": 9019905.0, "reward": 0.5161375880241394, "reward_std": 0.4549876987934113, "rewards/accuracy_reward": 0.2603298544883728, "rewards/brier_reward": 0.31216180324554443, "rewards/confidence_uniqueness_reward": 0.290114027261734, "rewards/format_reward": 0.5964409589767456, "rewards/frontier_coverage_0": 0.27497095465660093, "rewards/frontier_coverage_1": 0.27497095465660093, "rewards/frontier_coverage_10": 0.27497095465660093, "rewards/frontier_coverage_15": 0.27497095465660093, "rewards/frontier_coverage_20": 0.27497095465660093, "rewards/frontier_coverage_25": 0.27497095465660093, "rewards/frontier_coverage_5": 0.27497095465660093, "signal/accuracy_reward/centered_abs_mean": 0.30428059697151183, "signal/accuracy_reward/group_std_mean": 0.3650037467479706, "signal/accuracy_reward/group_zero_std_frac": 0.0916666679084301, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15214029848575591, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15214029848575591, "signal/advantage_abs_mean": 0.3943765044212341, "signal/advantage_pre_scale_abs_mean": 0.3943765044212341, "signal/advantage_pre_scale_std": 0.4582944571971893, "signal/advantage_std": 0.4582944571971893, "signal/brier_reward/centered_abs_mean": 0.3165676236152649, "signal/brier_reward/group_std_mean": 0.370278537273407, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03165676258504391, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03165676258504391, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23938581049442292, "signal/confidence_uniqueness_reward/group_std_mean": 0.2900544762611389, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023938581719994544, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023938581719994544, "signal/format_reward/centered_abs_mean": 0.43942599892616274, "signal/format_reward/group_std_mean": 0.474114316701889, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21971299946308137, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21971299946308137, "signal/frontier_coverage_0/centered_abs_mean": 0.3062441885471344, "signal/frontier_coverage_0/group_std_mean": 0.36501355171203614, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_1/centered_abs_mean": 0.3062441885471344, "signal/frontier_coverage_1/group_std_mean": 0.36501355171203614, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_10/centered_abs_mean": 0.3062441885471344, "signal/frontier_coverage_10/group_std_mean": 0.36501355171203614, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_15/centered_abs_mean": 0.3062441885471344, "signal/frontier_coverage_15/group_std_mean": 0.36501355171203614, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_20/centered_abs_mean": 0.3062441885471344, "signal/frontier_coverage_20/group_std_mean": 0.36501355171203614, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_25/centered_abs_mean": 0.3062441885471344, "signal/frontier_coverage_25/group_std_mean": 0.36501355171203614, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_5/centered_abs_mean": 0.3062441885471344, "signal/frontier_coverage_5/group_std_mean": 0.36501355171203614, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00437929192557931, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00437929192557931, "step": 5 }, { "calibration/aurc": 0.517125217183162, "calibration/batch_distribution_entropy": 0.27050996094866103, "calibration/confidence_entropy": 0.21723002440868527, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47366067204089707, "calibration/mean_confidence": 0.91929693281871, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017795138888888885, "completions/max_length": 3914.2, "completions/max_terminated_length": 3914.2, "completions/mean_length": 473.3821228027344, "completions/mean_terminated_length": 482.12042236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 29.2, "epoch": 0.023999700003749954, "grad_norm": 0.0036878257524222136, "learning_rate": 1.1904761904761906e-06, "loss": 0.0018, "num_tokens": 17555987.0, "reward": 0.6115049719810486, "reward_std": 0.42418900728225706, "rewards/accuracy_reward": 0.29661458134651186, "rewards/brier_reward": 0.3612636148929596, "rewards/confidence_uniqueness_reward": 0.35831656455993655, "rewards/format_reward": 0.7197048544883728, "rewards/frontier_coverage_0": 0.3135582983493805, "rewards/frontier_coverage_1": 0.3135582983493805, "rewards/frontier_coverage_10": 0.3135582983493805, "rewards/frontier_coverage_15": 0.3135582983493805, "rewards/frontier_coverage_20": 0.3135582983493805, "rewards/frontier_coverage_25": 0.3135582983493805, "rewards/frontier_coverage_5": 0.3135582983493805, "signal/accuracy_reward/centered_abs_mean": 0.3206434488296509, "signal/accuracy_reward/group_std_mean": 0.37941792607307434, "signal/accuracy_reward/group_zero_std_frac": 0.08055555671453477, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16032172441482545, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16032172441482545, "signal/advantage_abs_mean": 0.34988189339637754, "signal/advantage_pre_scale_abs_mean": 0.34988189339637754, "signal/advantage_pre_scale_std": 0.42775319814682006, "signal/advantage_std": 0.42775319814682006, "signal/brier_reward/centered_abs_mean": 0.3171759068965912, "signal/brier_reward/group_std_mean": 0.3707857489585876, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03171758912503719, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03171758912503719, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22490560114383698, "signal/confidence_uniqueness_reward/group_std_mean": 0.28029904961586, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02249056026339531, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02249056026339531, "signal/format_reward/centered_abs_mean": 0.35055881142616274, "signal/format_reward/group_std_mean": 0.41676204204559325, "signal/format_reward/group_zero_std_frac": 0.00555555559694767, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17527940571308137, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.17527940571308137, "signal/frontier_coverage_0/centered_abs_mean": 0.3156200468540192, "signal/frontier_coverage_0/group_std_mean": 0.3727039873600006, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_1/centered_abs_mean": 0.3156200468540192, "signal/frontier_coverage_1/group_std_mean": 0.3727039873600006, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_10/centered_abs_mean": 0.3156200468540192, "signal/frontier_coverage_10/group_std_mean": 0.3727039873600006, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_15/centered_abs_mean": 0.3156200468540192, "signal/frontier_coverage_15/group_std_mean": 0.3727039873600006, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_20/centered_abs_mean": 0.3156200468540192, "signal/frontier_coverage_20/group_std_mean": 0.3727039873600006, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_25/centered_abs_mean": 0.3156200468540192, "signal/frontier_coverage_25/group_std_mean": 0.3727039873600006, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_5/centered_abs_mean": 0.3156200468540192, "signal/frontier_coverage_5/group_std_mean": 0.3727039873600006, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004513366613537073, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004513366613537073, "step": 10 }, { "calibration/aurc": 0.5414595000464721, "calibration/batch_distribution_entropy": 0.2774047038997945, "calibration/confidence_entropy": 0.22813624148006326, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5205185786094126, "calibration/mean_confidence": 0.9187366141594058, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012239583333333326, "completions/max_length": 3908.2, "completions/max_terminated_length": 3908.2, "completions/mean_length": 412.8827331542969, "completions/mean_terminated_length": 418.0228271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 38.6, "epoch": 0.03599955000562493, "grad_norm": 0.0015186438104137778, "learning_rate": 1.7857142857142859e-06, "loss": -0.0113, "num_tokens": 25414380.0, "reward": 0.7417026281356811, "reward_std": 0.31185888051986693, "rewards/accuracy_reward": 0.30269097089767455, "rewards/brier_reward": 0.4077360093593597, "rewards/confidence_uniqueness_reward": 0.5074092388153076, "rewards/format_reward": 0.93125, "rewards/frontier_coverage_0": 0.33184386491775514, "rewards/frontier_coverage_1": 0.33184386491775514, "rewards/frontier_coverage_10": 0.33184386491775514, "rewards/frontier_coverage_15": 0.33184386491775514, "rewards/frontier_coverage_20": 0.33184386491775514, "rewards/frontier_coverage_25": 0.33184386491775514, "rewards/frontier_coverage_5": 0.33184386491775514, "signal/accuracy_reward/centered_abs_mean": 0.31045464873313905, "signal/accuracy_reward/group_std_mean": 0.3683877825737, "signal/accuracy_reward/group_zero_std_frac": 0.10000000223517418, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15522732436656952, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15522732436656952, "signal/advantage_abs_mean": 0.24663678109645842, "signal/advantage_pre_scale_abs_mean": 0.24663678109645842, "signal/advantage_pre_scale_std": 0.3215538918972015, "signal/advantage_std": 0.3215538918972015, "signal/brier_reward/centered_abs_mean": 0.29533362984657285, "signal/brier_reward/group_std_mean": 0.34695218205451966, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029533364251255988, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.029533364251255988, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.179153373837471, "signal/confidence_uniqueness_reward/group_std_mean": 0.22881248593330383, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017915337532758712, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017915337532758712, "signal/format_reward/centered_abs_mean": 0.11593967080116271, "signal/format_reward/group_std_mean": 0.19709881097078324, "signal/format_reward/group_zero_std_frac": 0.280555559694767, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05796983540058136, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.05796983540058136, "signal/frontier_coverage_0/centered_abs_mean": 0.3037257790565491, "signal/frontier_coverage_0/group_std_mean": 0.3585019886493683, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_1/centered_abs_mean": 0.3037257790565491, "signal/frontier_coverage_1/group_std_mean": 0.3585019886493683, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_10/centered_abs_mean": 0.3037257790565491, "signal/frontier_coverage_10/group_std_mean": 0.3585019886493683, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_15/centered_abs_mean": 0.3037257790565491, "signal/frontier_coverage_15/group_std_mean": 0.3585019886493683, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_20/centered_abs_mean": 0.3037257790565491, "signal/frontier_coverage_20/group_std_mean": 0.3585019886493683, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_25/centered_abs_mean": 0.3037257790565491, "signal/frontier_coverage_25/group_std_mean": 0.3585019886493683, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_5/centered_abs_mean": 0.3037257790565491, "signal/frontier_coverage_5/group_std_mean": 0.3585019886493683, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0043432785663753744, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0043432785663753744, "step": 15 }, { "calibration/aurc": 0.4847516582072403, "calibration/batch_distribution_entropy": 0.3599406528071635, "calibration/buffer_distribution_entropy": 0.2936503471332332, "calibration/confidence_entropy": 0.29304720208371154, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4180637786651644, "calibration/mean_confidence": 0.8936602458423843, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010763888888888884, "completions/max_length": 3824.4, "completions/max_terminated_length": 3824.4, "completions/mean_length": 432.4386352539062, "completions/mean_terminated_length": 437.1669494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.04799940000749991, "grad_norm": 0.0023721419274806976, "learning_rate": 2.380952380952381e-06, "loss": -0.0075, "num_tokens": 33509769.0, "reward": 0.8219772577285767, "reward_std": 0.23951346278190613, "rewards/accuracy_reward": 0.40755208730697634, "rewards/brier_reward": 0.5194338917732239, "rewards/confidence_uniqueness_reward": 0.5695141077041626, "rewards/format_reward": 0.9841145753860474, "rewards/frontier_coverage_0": 0.17231892738491297, "rewards/frontier_coverage_1": 0.17231892738491297, "rewards/frontier_coverage_10": 0.17231892738491297, "rewards/frontier_coverage_15": 0.17231892738491297, "rewards/frontier_coverage_20": 0.17231892738491297, "rewards/frontier_coverage_25": 0.17231892738491297, "rewards/frontier_coverage_5": 0.17231892738491297, "signal/accuracy_reward/centered_abs_mean": 0.28974066972732543, "signal/accuracy_reward/group_std_mean": 0.3587852954864502, "signal/accuracy_reward/group_zero_std_frac": 0.07777778059244156, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14487033486366271, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14487033486366271, "signal/advantage_abs_mean": 0.1873514622449875, "signal/advantage_pre_scale_abs_mean": 0.1873514622449875, "signal/advantage_pre_scale_std": 0.24830279350280762, "signal/advantage_std": 0.24830279350280762, "signal/brier_reward/centered_abs_mean": 0.2584921300411224, "signal/brier_reward/group_std_mean": 0.31671356558799746, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025849214196205138, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.025849214196205138, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17959446012973784, "signal/confidence_uniqueness_reward/group_std_mean": 0.21536695957183838, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01795944608747959, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01795944608747959, "signal/format_reward/centered_abs_mean": 0.02878146693110466, "signal/format_reward/group_std_mean": 0.059326070547103885, "signal/format_reward/group_zero_std_frac": 0.7416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01439073346555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01439073346555233, "signal/frontier_coverage_0/centered_abs_mean": 0.12399653047323227, "signal/frontier_coverage_0/group_std_mean": 0.16216810792684555, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_1/centered_abs_mean": 0.12399653047323227, "signal/frontier_coverage_1/group_std_mean": 0.16216810792684555, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_10/centered_abs_mean": 0.12399653047323227, "signal/frontier_coverage_10/group_std_mean": 0.16216810792684555, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_15/centered_abs_mean": 0.12399653047323227, "signal/frontier_coverage_15/group_std_mean": 0.16216810792684555, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_20/centered_abs_mean": 0.12399653047323227, "signal/frontier_coverage_20/group_std_mean": 0.16216810792684555, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_25/centered_abs_mean": 0.12399653047323227, "signal/frontier_coverage_25/group_std_mean": 0.16216810792684555, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_5/centered_abs_mean": 0.12399653047323227, "signal/frontier_coverage_5/group_std_mean": 0.16216810792684555, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017731502826791256, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017731502826791256, "step": 20 }, { "calibration/aurc": 0.38634314240054995, "calibration/batch_distribution_entropy": 0.46526513959059035, "calibration/buffer_distribution_entropy": 0.32973422005179354, "calibration/confidence_entropy": 0.3478899647822179, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.00783289817232376, "calibration/coverage@25%": 0.13632165978831523, "calibration/coverage@30%": 0.225260832701648, "calibration/coverage@5%": 0.0, "calibration/ece": 0.308634197428995, "calibration/mean_confidence": 0.8684335224123025, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009722222222222233, "completions/max_length": 3845.2, "completions/max_terminated_length": 3845.2, "completions/mean_length": 477.2792663574219, "completions/mean_terminated_length": 481.96564331054685, "completions/min_length": 0.0, "completions/min_terminated_length": 96.4, "epoch": 0.05999925000937488, "grad_norm": 0.0009343558340333402, "learning_rate": 2.9761904761904763e-06, "loss": -0.0071, "num_tokens": 42132474.0, "reward": 0.8692076325416564, "reward_std": 0.21406486630439758, "rewards/accuracy_reward": 0.5006076395511627, "rewards/brier_reward": 0.6141545534133911, "rewards/confidence_uniqueness_reward": 0.6311920523643494, "rewards/format_reward": 0.9878472208976745, "rewards/frontier_coverage_0": 0.0044506344594992696, "rewards/frontier_coverage_1": 0.0044506344594992696, "rewards/frontier_coverage_10": 0.0044506344594992696, "rewards/frontier_coverage_15": 0.0044506344594992696, "rewards/frontier_coverage_20": 0.0044506344594992696, "rewards/frontier_coverage_25": 0.0044506344594992696, "rewards/frontier_coverage_5": 0.0044506344594992696, "signal/accuracy_reward/centered_abs_mean": 0.28586697578430176, "signal/accuracy_reward/group_std_mean": 0.35248740911483767, "signal/accuracy_reward/group_zero_std_frac": 0.09444444701075554, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14293348789215088, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14293348789215088, "signal/advantage_abs_mean": 0.16929234862327575, "signal/advantage_pre_scale_abs_mean": 0.16929234862327575, "signal/advantage_pre_scale_std": 0.22722831070423127, "signal/advantage_std": 0.22722831070423127, "signal/brier_reward/centered_abs_mean": 0.2289237290620804, "signal/brier_reward/group_std_mean": 0.2827379047870636, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02289237417280674, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02289237417280674, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1367410659790039, "signal/confidence_uniqueness_reward/group_std_mean": 0.16966789066791535, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013674106262624264, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013674106262624264, "signal/format_reward/centered_abs_mean": 0.021506076492369176, "signal/format_reward/group_std_mean": 0.04259942732751369, "signal/format_reward/group_zero_std_frac": 0.8194444656372071, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010753038246184588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010753038246184588, "signal/frontier_coverage_0/centered_abs_mean": 0.02831830345094204, "signal/frontier_coverage_0/group_std_mean": 0.0470881313085556, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_1/centered_abs_mean": 0.02831830345094204, "signal/frontier_coverage_1/group_std_mean": 0.0470881313085556, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_10/centered_abs_mean": 0.02831830345094204, "signal/frontier_coverage_10/group_std_mean": 0.0470881313085556, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_15/centered_abs_mean": 0.02831830345094204, "signal/frontier_coverage_15/group_std_mean": 0.0470881313085556, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_20/centered_abs_mean": 0.02831830345094204, "signal/frontier_coverage_20/group_std_mean": 0.0470881313085556, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_25/centered_abs_mean": 0.02831830345094204, "signal/frontier_coverage_25/group_std_mean": 0.0470881313085556, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_5/centered_abs_mean": 0.02831830345094204, "signal/frontier_coverage_5/group_std_mean": 0.0470881313085556, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0004049517388921231, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0004049517388921231, "step": 25 }, { "calibration/aurc": 0.31364832770276674, "calibration/batch_distribution_entropy": 0.499710605804783, "calibration/buffer_distribution_entropy": 0.38684449581318747, "calibration/confidence_entropy": 0.422717480258496, "calibration/coverage@0%": 0.010080906132506668, "calibration/coverage@1%": 0.010080906132506668, "calibration/coverage@10%": 0.010080906132506668, "calibration/coverage@15%": 0.039589102853818135, "calibration/coverage@20%": 0.039589102853818135, "calibration/coverage@25%": 0.189862326897534, "calibration/coverage@30%": 0.4886187270899134, "calibration/coverage@5%": 0.010080906132506668, "calibration/ece": 0.18590618469068998, "calibration/mean_confidence": 0.83246762819597, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01180555555555558, "completions/max_length": 4029.6, "completions/max_terminated_length": 4029.6, "completions/mean_length": 553.0240539550781, "completions/mean_terminated_length": 559.6217895507813, "completions/min_length": 0.0, "completions/min_terminated_length": 122.6, "epoch": 0.07199910001124986, "grad_norm": 0.000492545310407877, "learning_rate": 3.5714285714285718e-06, "loss": -0.0069, "num_tokens": 51613231.0, "reward": 0.9050763845443726, "reward_std": 0.19376931786537172, "rewards/accuracy_reward": 0.5701388835906982, "rewards/brier_reward": 0.6859318375587463, "rewards/confidence_uniqueness_reward": 0.5833416938781738, "rewards/format_reward": 0.9861979246139526, "rewards/frontier_coverage_0": -0.00019347216002643108, "rewards/frontier_coverage_1": -0.00019347216002643108, "rewards/frontier_coverage_10": -0.00019347216002643108, "rewards/frontier_coverage_15": -0.00019347216002643108, "rewards/frontier_coverage_20": -0.00019347216002643108, "rewards/frontier_coverage_25": -0.00019347216002643108, "rewards/frontier_coverage_5": -0.00019347216002643108, "signal/accuracy_reward/centered_abs_mean": 0.2508246570825577, "signal/accuracy_reward/group_std_mean": 0.31349337100982666, "signal/accuracy_reward/group_zero_std_frac": 0.1694444462656975, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12541232854127884, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12541232854127884, "signal/advantage_abs_mean": 0.15153846591711045, "signal/advantage_pre_scale_abs_mean": 0.15153846591711045, "signal/advantage_pre_scale_std": 0.2147096276283264, "signal/advantage_std": 0.2147096276283264, "signal/brier_reward/centered_abs_mean": 0.18513798117637634, "signal/brier_reward/group_std_mean": 0.23217344880104065, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018513799458742142, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018513799458742142, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20066949725151062, "signal/confidence_uniqueness_reward/group_std_mean": 0.23112341463565828, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02006694972515106, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02006694972515106, "signal/format_reward/centered_abs_mean": 0.02318250872194767, "signal/format_reward/group_std_mean": 0.04461696371436119, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011591254360973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011591254360973835, "signal/frontier_coverage_0/centered_abs_mean": 0.03151394948363304, "signal/frontier_coverage_0/group_std_mean": 0.04832508638501167, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_1/centered_abs_mean": 0.03151394948363304, "signal/frontier_coverage_1/group_std_mean": 0.04832508638501167, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_10/centered_abs_mean": 0.03151394948363304, "signal/frontier_coverage_10/group_std_mean": 0.04832508638501167, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_15/centered_abs_mean": 0.03151394948363304, "signal/frontier_coverage_15/group_std_mean": 0.04832508638501167, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_20/centered_abs_mean": 0.03151394948363304, "signal/frontier_coverage_20/group_std_mean": 0.04832508638501167, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_25/centered_abs_mean": 0.03151394948363304, "signal/frontier_coverage_25/group_std_mean": 0.04832508638501167, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_5/centered_abs_mean": 0.03151394948363304, "signal/frontier_coverage_5/group_std_mean": 0.04832508638501167, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0004506495199166238, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0004506495199166238, "step": 30 }, { "calibration/aurc": 0.2681236379901537, "calibration/batch_distribution_entropy": 0.6073690581770046, "calibration/buffer_distribution_entropy": 0.4477840560637281, "calibration/confidence_entropy": 0.4926214392193963, "calibration/coverage@0%": 0.005292996092884997, "calibration/coverage@1%": 0.005292996092884997, "calibration/coverage@10%": 0.01880650960639851, "calibration/coverage@15%": 0.06538064730050323, "calibration/coverage@20%": 0.12169978127767771, "calibration/coverage@25%": 0.48489876034564067, "calibration/coverage@30%": 0.8208872967759714, "calibration/coverage@5%": 0.005292996092884997, "calibration/ece": 0.10877696174454979, "calibration/mean_confidence": 0.7835651647507251, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4059.6, "completions/max_terminated_length": 4059.6, "completions/mean_length": 624.3829956054688, "completions/mean_terminated_length": 634.435595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 166.4, "epoch": 0.08399895001312484, "grad_norm": 0.0005502038984559476, "learning_rate": 4.166666666666667e-06, "loss": -0.0098, "num_tokens": 61883563.0, "reward": 0.9427950978279114, "reward_std": 0.16786060631275176, "rewards/accuracy_reward": 0.6350694537162781, "rewards/brier_reward": 0.7387005448341369, "rewards/confidence_uniqueness_reward": 0.6191936731338501, "rewards/format_reward": 0.9811631917953492, "rewards/frontier_coverage_0": -0.011095415393356234, "rewards/frontier_coverage_1": -0.011095415393356234, "rewards/frontier_coverage_10": -0.011095415393356234, "rewards/frontier_coverage_15": -0.011095415393356234, "rewards/frontier_coverage_20": -0.011095415393356234, "rewards/frontier_coverage_25": -0.011095415393356234, "rewards/frontier_coverage_5": -0.011095415393356234, "signal/accuracy_reward/centered_abs_mean": 0.20149739682674409, "signal/accuracy_reward/group_std_mean": 0.260405895113945, "signal/accuracy_reward/group_zero_std_frac": 0.2833333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10074869841337204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10074869841337204, "signal/advantage_abs_mean": 0.1255136936903, "signal/advantage_pre_scale_abs_mean": 0.1255136936903, "signal/advantage_pre_scale_std": 0.1959227830171585, "signal/advantage_std": 0.1959227830171585, "signal/brier_reward/centered_abs_mean": 0.1406739756464958, "signal/brier_reward/group_std_mean": 0.1840929538011551, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014067397452890873, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014067397452890873, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.14535255879163742, "signal/confidence_uniqueness_reward/group_std_mean": 0.17814411520957946, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01453525610268116, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01453525610268116, "signal/format_reward/centered_abs_mean": 0.02883572019636631, "signal/format_reward/group_std_mean": 0.05311768278479576, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014417860098183155, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014417860098183155, "signal/frontier_coverage_0/centered_abs_mean": 0.04025139883160591, "signal/frontier_coverage_0/group_std_mean": 0.0570778988301754, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_1/centered_abs_mean": 0.04025139883160591, "signal/frontier_coverage_1/group_std_mean": 0.0570778988301754, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_10/centered_abs_mean": 0.04025139883160591, "signal/frontier_coverage_10/group_std_mean": 0.0570778988301754, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_15/centered_abs_mean": 0.04025139883160591, "signal/frontier_coverage_15/group_std_mean": 0.0570778988301754, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_20/centered_abs_mean": 0.04025139883160591, "signal/frontier_coverage_20/group_std_mean": 0.0570778988301754, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_25/centered_abs_mean": 0.04025139883160591, "signal/frontier_coverage_25/group_std_mean": 0.0570778988301754, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_5/centered_abs_mean": 0.04025139883160591, "signal/frontier_coverage_5/group_std_mean": 0.0570778988301754, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005755949881859123, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005755949881859123, "step": 35 }, { "calibration/aurc": 0.30111796135846025, "calibration/batch_distribution_entropy": 0.7053583205644779, "calibration/buffer_distribution_entropy": 0.5234970614904426, "calibration/confidence_entropy": 0.5460232422366482, "calibration/coverage@0%": 0.024122536032217527, "calibration/coverage@1%": 0.024122536032217527, "calibration/coverage@10%": 0.028311017707610197, "calibration/coverage@15%": 0.14754387433618138, "calibration/coverage@20%": 0.1898415244667297, "calibration/coverage@25%": 0.2871458225715403, "calibration/coverage@30%": 0.4155797006469385, "calibration/coverage@5%": 0.028311017707610197, "calibration/ece": 0.10965484584656611, "calibration/mean_confidence": 0.7238385058092655, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014149305555555557, "completions/max_length": 3896.8, "completions/max_terminated_length": 3896.8, "completions/mean_length": 662.5091186523438, "completions/mean_terminated_length": 671.992822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.09599880001499982, "grad_norm": 0.00039204536005854607, "learning_rate": 4.761904761904762e-06, "loss": -0.0107, "num_tokens": 72635188.0, "reward": 0.9650038480758667, "reward_std": 0.15621234178543092, "rewards/accuracy_reward": 0.6572048544883728, "rewards/brier_reward": 0.7644977688789367, "rewards/confidence_uniqueness_reward": 0.6965551018714905, "rewards/format_reward": 0.9844617962837219, "rewards/frontier_coverage_0": -0.01932877181097865, "rewards/frontier_coverage_1": -0.01932877181097865, "rewards/frontier_coverage_10": -0.01932877181097865, "rewards/frontier_coverage_15": -0.01932877181097865, "rewards/frontier_coverage_20": -0.01932877181097865, "rewards/frontier_coverage_25": -0.01932877181097865, "rewards/frontier_coverage_5": -0.01932877181097865, "signal/accuracy_reward/centered_abs_mean": 0.18748372197151184, "signal/accuracy_reward/group_std_mean": 0.24760319292545319, "signal/accuracy_reward/group_zero_std_frac": 0.3000000029802322, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09374186098575592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09374186098575592, "signal/advantage_abs_mean": 0.11463638693094254, "signal/advantage_pre_scale_abs_mean": 0.11463638693094254, "signal/advantage_pre_scale_std": 0.18725805282592772, "signal/advantage_std": 0.18725805282592772, "signal/brier_reward/centered_abs_mean": 0.1293620839715004, "signal/brier_reward/group_std_mean": 0.16848357319831847, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012936208583414554, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012936208583414554, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.14036289900541304, "signal/confidence_uniqueness_reward/group_std_mean": 0.16973767578601837, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014036289602518081, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014036289602518081, "signal/format_reward/centered_abs_mean": 0.02676323764026165, "signal/format_reward/group_std_mean": 0.050653649121522905, "signal/format_reward/group_zero_std_frac": 0.7916666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013381618820130826, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013381618820130826, "signal/frontier_coverage_0/centered_abs_mean": 0.07164318114519119, "signal/frontier_coverage_0/group_std_mean": 0.1008858099579811, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_1/centered_abs_mean": 0.07164318114519119, "signal/frontier_coverage_1/group_std_mean": 0.1008858099579811, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_10/centered_abs_mean": 0.07164318114519119, "signal/frontier_coverage_10/group_std_mean": 0.1008858099579811, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_15/centered_abs_mean": 0.07164318114519119, "signal/frontier_coverage_15/group_std_mean": 0.1008858099579811, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_20/centered_abs_mean": 0.07164318114519119, "signal/frontier_coverage_20/group_std_mean": 0.1008858099579811, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_25/centered_abs_mean": 0.07164318114519119, "signal/frontier_coverage_25/group_std_mean": 0.1008858099579811, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_5/centered_abs_mean": 0.07164318114519119, "signal/frontier_coverage_5/group_std_mean": 0.1008858099579811, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010244975332170726, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010244975332170726, "step": 40 }, { "calibration/aurc": 0.20395504840548478, "calibration/batch_distribution_entropy": 0.8372735413533098, "calibration/buffer_distribution_entropy": 0.5938288992808299, "calibration/confidence_entropy": 0.5149116414916268, "calibration/coverage@0%": 0.01894778067656185, "calibration/coverage@1%": 0.01894778067656185, "calibration/coverage@10%": 0.09860851646372701, "calibration/coverage@15%": 0.11114115353944505, "calibration/coverage@20%": 0.4856548179675456, "calibration/coverage@25%": 0.8623658713733823, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.030958224540791612, "calibration/ece": 0.12145240512186213, "calibration/mean_confidence": 0.686226908594128, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01293402777777779, "completions/max_length": 3836.2, "completions/max_terminated_length": 3836.2, "completions/mean_length": 695.34775390625, "completions/mean_terminated_length": 704.525, "completions/min_length": 0.0, "completions/min_terminated_length": 202.8, "epoch": 0.1079986500168748, "grad_norm": 0.0004304039175622165, "learning_rate": 4.909638554216868e-06, "loss": -0.0104, "num_tokens": 83780858.0, "reward": 0.9816272854804993, "reward_std": 0.1476329445838928, "rewards/accuracy_reward": 0.6620659947395324, "rewards/brier_reward": 0.7589836120605469, "rewards/confidence_uniqueness_reward": 0.8408130168914795, "rewards/format_reward": 0.9857638835906982, "rewards/frontier_coverage_0": -0.022650658898055554, "rewards/frontier_coverage_1": -0.022650658898055554, "rewards/frontier_coverage_10": -0.022650658898055554, "rewards/frontier_coverage_15": -0.022650658898055554, "rewards/frontier_coverage_20": -0.022650658898055554, "rewards/frontier_coverage_25": -0.022650658898055554, "rewards/frontier_coverage_5": -0.022650658898055554, "signal/accuracy_reward/centered_abs_mean": 0.18065863847732544, "signal/accuracy_reward/group_std_mean": 0.23835379481315613, "signal/accuracy_reward/group_zero_std_frac": 0.3194444507360458, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09032931923866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09032931923866272, "signal/advantage_abs_mean": 0.10960723757743836, "signal/advantage_pre_scale_abs_mean": 0.10960723757743836, "signal/advantage_pre_scale_std": 0.17717336416244506, "signal/advantage_std": 0.17717336416244506, "signal/brier_reward/centered_abs_mean": 0.15332198441028594, "signal/brier_reward/group_std_mean": 0.19752687215805054, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015332199074327946, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015332199074327946, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09375370144844056, "signal/confidence_uniqueness_reward/group_std_mean": 0.11937893778085709, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009375370014458895, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009375370014458895, "signal/format_reward/centered_abs_mean": 0.02317708320915699, "signal/format_reward/group_std_mean": 0.040333667397499086, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011588541604578496, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011588541604578496, "signal/frontier_coverage_0/centered_abs_mean": 0.11648316830396652, "signal/frontier_coverage_0/group_std_mean": 0.1628135621547699, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_1/centered_abs_mean": 0.11648316830396652, "signal/frontier_coverage_1/group_std_mean": 0.1628135621547699, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_10/centered_abs_mean": 0.11648316830396652, "signal/frontier_coverage_10/group_std_mean": 0.1628135621547699, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_15/centered_abs_mean": 0.11648316830396652, "signal/frontier_coverage_15/group_std_mean": 0.1628135621547699, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_20/centered_abs_mean": 0.11648316830396652, "signal/frontier_coverage_20/group_std_mean": 0.1628135621547699, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_25/centered_abs_mean": 0.11648316830396652, "signal/frontier_coverage_25/group_std_mean": 0.1628135621547699, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_5/centered_abs_mean": 0.11648316830396652, "signal/frontier_coverage_5/group_std_mean": 0.1628135621547699, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016657092841342092, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016657092841342092, "step": 45 }, { "calibration/aurc": 0.40004916389399164, "calibration/batch_distribution_entropy": 0.8503065773009097, "calibration/buffer_distribution_entropy": 0.6432089511173741, "calibration/confidence_entropy": 0.4667162645346662, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.015384615384615385, "calibration/coverage@20%": 0.019628647214854113, "calibration/coverage@25%": 0.08075988115602772, "calibration/coverage@30%": 0.3064657805788464, "calibration/coverage@5%": 0.0, "calibration/ece": 0.24841259649370925, "calibration/mean_confidence": 0.7116104147978586, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00972222222222221, "completions/max_length": 3621.4, "completions/max_terminated_length": 3621.4, "completions/mean_length": 705.259033203125, "completions/mean_terminated_length": 712.1700073242188, "completions/min_length": 0.0, "completions/min_terminated_length": 183.4, "epoch": 0.11999850001874976, "grad_norm": 0.000493047118652612, "learning_rate": 4.759036144578314e-06, "loss": -0.0076, "num_tokens": 95003042.0, "reward": 0.979445469379425, "reward_std": 0.14116989970207214, "rewards/accuracy_reward": 0.6446180582046509, "rewards/brier_reward": 0.7461867094039917, "rewards/confidence_uniqueness_reward": 0.8847499370574952, "rewards/format_reward": 0.98984375, "rewards/frontier_coverage_0": -0.008782240888103842, "rewards/frontier_coverage_1": -0.008782240888103842, "rewards/frontier_coverage_10": -0.008782240888103842, "rewards/frontier_coverage_15": -0.008782240888103842, "rewards/frontier_coverage_20": -0.008782240888103842, "rewards/frontier_coverage_25": -0.008782240888103842, "rewards/frontier_coverage_5": -0.008782240888103842, "signal/accuracy_reward/centered_abs_mean": 0.1734266459941864, "signal/accuracy_reward/group_std_mean": 0.22699449956417084, "signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0867133229970932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0867133229970932, "signal/advantage_abs_mean": 0.10538902878761292, "signal/advantage_pre_scale_abs_mean": 0.10538902878761292, "signal/advantage_pre_scale_std": 0.17160050570964813, "signal/advantage_std": 0.17160050570964813, "signal/brier_reward/centered_abs_mean": 0.17446674704551696, "signal/brier_reward/group_std_mean": 0.22122922539710999, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017446675151586533, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017446675151586533, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07939892560243607, "signal/confidence_uniqueness_reward/group_std_mean": 0.10281916856765747, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007939892914146185, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007939892914146185, "signal/format_reward/centered_abs_mean": 0.017041015811264514, "signal/format_reward/group_std_mean": 0.03274031579494476, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008520507905632257, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008520507905632257, "signal/frontier_coverage_0/centered_abs_mean": 0.1188867524266243, "signal/frontier_coverage_0/group_std_mean": 0.17139540314674379, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_1/centered_abs_mean": 0.1188867524266243, "signal/frontier_coverage_1/group_std_mean": 0.17139540314674379, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_10/centered_abs_mean": 0.1188867524266243, "signal/frontier_coverage_10/group_std_mean": 0.17139540314674379, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_15/centered_abs_mean": 0.1188867524266243, "signal/frontier_coverage_15/group_std_mean": 0.17139540314674379, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_20/centered_abs_mean": 0.1188867524266243, "signal/frontier_coverage_20/group_std_mean": 0.17139540314674379, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_25/centered_abs_mean": 0.1188867524266243, "signal/frontier_coverage_25/group_std_mean": 0.17139540314674379, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_5/centered_abs_mean": 0.1188867524266243, "signal/frontier_coverage_5/group_std_mean": 0.17139540314674379, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017000806052237748, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017000806052237748, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.27069268800775687, "eval_calibration/batch_distribution_entropy": 0.7597488500774766, "eval_calibration/buffer_distribution_entropy": 0.665248508467892, "eval_calibration/confidence_entropy": 0.41376028637381856, "eval_calibration/coverage@0%": 0.057291666666666664, "eval_calibration/coverage@1%": 0.057291666666666664, "eval_calibration/coverage@10%": 0.057291666666666664, "eval_calibration/coverage@15%": 0.15625, "eval_calibration/coverage@20%": 0.3385416666666667, "eval_calibration/coverage@25%": 0.5, "eval_calibration/coverage@30%": 0.7708333333333334, "eval_calibration/coverage@5%": 0.057291666666666664, "eval_calibration/ece": 0.23475069231304369, "eval_calibration/mean_confidence": 0.7354858947273533, "eval_completions/clipped_ratio": 0.006076388888888895, "eval_completions/max_length": 2445.5, "eval_completions/max_terminated_length": 2445.5, "eval_completions/mean_length": 694.704345703125, "eval_completions/mean_terminated_length": 699.0004577636719, "eval_completions/min_length": 85.33333333333333, "eval_completions/min_terminated_length": 241.0, "eval_loss": 0.0, "eval_num_tokens": 95003042.0, "eval_reward": 0.9741584062576294, "eval_reward_std": 0.2603639264901479, "eval_rewards/accuracy_reward": 0.6388889054457346, "eval_rewards/brier_reward": 0.7358029286066691, "eval_rewards/confidence_uniqueness_reward": 0.8596515456835429, "eval_rewards/format_reward": 0.9930555621782938, "eval_rewards/frontier_coverage_0": -0.013578996993601322, "eval_rewards/frontier_coverage_1": -0.013578996993601322, "eval_rewards/frontier_coverage_10": -0.013578996993601322, "eval_rewards/frontier_coverage_15": -0.013578996993601322, "eval_rewards/frontier_coverage_20": -0.013578996993601322, "eval_rewards/frontier_coverage_25": -0.013578996993601322, "eval_rewards/frontier_coverage_5": -0.013578996993601322, "eval_runtime": 203.1998, "eval_samples_per_second": 4.921, "eval_signal/accuracy_reward/centered_abs_mean": 0.4524739583333333, "eval_signal/accuracy_reward/group_std_mean": 0.4828086843093236, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22623697916666666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22623697916666666, "eval_signal/advantage_abs_mean": 0.23652214308579764, "eval_signal/advantage_pre_scale_abs_mean": 0.23652214308579764, "eval_signal/advantage_pre_scale_std": 0.2581101755301158, "eval_signal/advantage_std": 0.2581101755301158, "eval_signal/brier_reward/centered_abs_mean": 0.24942312637964884, "eval_signal/brier_reward/group_std_mean": 0.3024876117706299, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024942313010493915, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.024942313010493915, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06988021731376648, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09675693760315578, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00698802216599385, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00698802216599385, "eval_signal/format_reward/centered_abs_mean": 0.013454860852410397, "eval_signal/format_reward/group_std_mean": 0.03928370991100868, "eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006727430426205198, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.006727430426205198, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.14178907995422682, "eval_signal/frontier_coverage_0/group_std_mean": 0.22794127960999808, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.14178907995422682, "eval_signal/frontier_coverage_1/group_std_mean": 0.22794127960999808, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.14178907995422682, "eval_signal/frontier_coverage_10/group_std_mean": 0.22794127960999808, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.14178907995422682, "eval_signal/frontier_coverage_15/group_std_mean": 0.22794127960999808, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.14178907995422682, "eval_signal/frontier_coverage_20/group_std_mean": 0.22794127960999808, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.14178907995422682, "eval_signal/frontier_coverage_25/group_std_mean": 0.22794127960999808, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.14178907995422682, "eval_signal/frontier_coverage_5/group_std_mean": 0.22794127960999808, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002027583793581774, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002027583793581774, "eval_steps_per_second": 0.03, "step": 50 }, { "calibration/aurc": 0.2623736134960931, "calibration/batch_distribution_entropy": 0.8514919861126554, "calibration/buffer_distribution_entropy": 0.6764847742047964, "calibration/confidence_entropy": 0.44676430730968664, "calibration/coverage@0%": 0.002617801047120419, "calibration/coverage@1%": 0.002617801047120419, "calibration/coverage@10%": 0.13036649214659687, "calibration/coverage@15%": 0.22303664921465968, "calibration/coverage@20%": 0.2418848167539267, "calibration/coverage@25%": 0.5089005235602094, "calibration/coverage@30%": 0.737714654822215, "calibration/coverage@5%": 0.038743455497382194, "calibration/ece": 0.15346165735644177, "calibration/mean_confidence": 0.7219380418125023, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012152777777777768, "completions/max_length": 3167.8, "completions/max_terminated_length": 3167.8, "completions/mean_length": 718.0534912109375, "completions/mean_terminated_length": 726.9828491210938, "completions/min_length": 0.0, "completions/min_terminated_length": 155.2, "epoch": 0.13199835002062474, "grad_norm": 0.00041744497139006853, "learning_rate": 4.60843373493976e-06, "loss": -0.0089, "num_tokens": 106355594.0, "reward": 0.9921215295791626, "reward_std": 0.13974846601486207, "rewards/accuracy_reward": 0.66328125, "rewards/brier_reward": 0.7599790453910827, "rewards/confidence_uniqueness_reward": 0.9193215608596802, "rewards/format_reward": 0.9871527791023255, "rewards/frontier_coverage_0": -0.010245506907813251, "rewards/frontier_coverage_1": -0.010245506907813251, "rewards/frontier_coverage_10": -0.010245506907813251, "rewards/frontier_coverage_15": -0.010245506907813251, "rewards/frontier_coverage_20": -0.010245506907813251, "rewards/frontier_coverage_25": -0.010245506907813251, "rewards/frontier_coverage_5": -0.010245506907813251, "signal/accuracy_reward/centered_abs_mean": 0.17350803017616273, "signal/accuracy_reward/group_std_mean": 0.22752963304519652, "signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08675401508808137, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08675401508808137, "signal/advantage_abs_mean": 0.10408687144517899, "signal/advantage_pre_scale_abs_mean": 0.10408687144517899, "signal/advantage_pre_scale_std": 0.17095798552036284, "signal/advantage_std": 0.17095798552036284, "signal/brier_reward/centered_abs_mean": 0.16589346528053284, "signal/brier_reward/group_std_mean": 0.2125259518623352, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016589346528053283, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016589346528053283, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04859142899513245, "signal/confidence_uniqueness_reward/group_std_mean": 0.07001910582184792, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004859142657369375, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004859142657369375, "signal/format_reward/centered_abs_mean": 0.01928168386220932, "signal/format_reward/group_std_mean": 0.03471194803714752, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00964084193110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00964084193110466, "signal/frontier_coverage_0/centered_abs_mean": 0.13067235350608825, "signal/frontier_coverage_0/group_std_mean": 0.1851820766925812, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_1/centered_abs_mean": 0.13067235350608825, "signal/frontier_coverage_1/group_std_mean": 0.1851820766925812, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_10/centered_abs_mean": 0.13067235350608825, "signal/frontier_coverage_10/group_std_mean": 0.1851820766925812, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_15/centered_abs_mean": 0.13067235350608825, "signal/frontier_coverage_15/group_std_mean": 0.1851820766925812, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_20/centered_abs_mean": 0.13067235350608825, "signal/frontier_coverage_20/group_std_mean": 0.1851820766925812, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_25/centered_abs_mean": 0.13067235350608825, "signal/frontier_coverage_25/group_std_mean": 0.1851820766925812, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_5/centered_abs_mean": 0.13067235350608825, "signal/frontier_coverage_5/group_std_mean": 0.1851820766925812, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018686146708205343, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018686146708205343, "step": 55 }, { "calibration/aurc": 0.322583325206245, "calibration/batch_distribution_entropy": 0.8352429768114273, "calibration/buffer_distribution_entropy": 0.6996665514037949, "calibration/confidence_entropy": 0.4572730513075878, "calibration/coverage@0%": 0.005208333333333334, "calibration/coverage@1%": 0.005208333333333334, "calibration/coverage@10%": 0.16354166666666667, "calibration/coverage@15%": 0.29375, "calibration/coverage@20%": 0.359375, "calibration/coverage@25%": 0.3921875, "calibration/coverage@30%": 0.45691489361702126, "calibration/coverage@5%": 0.005208333333333334, "calibration/ece": 0.1953935735919945, "calibration/mean_confidence": 0.72532795217994, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888884, "completions/max_length": 3781.6, "completions/max_terminated_length": 3781.6, "completions/mean_length": 738.7850830078125, "completions/mean_terminated_length": 745.7328125, "completions/min_length": 0.0, "completions/min_terminated_length": 195.8, "epoch": 0.14399820002249972, "grad_norm": 0.0004830217803828418, "learning_rate": 4.457831325301205e-06, "loss": -0.0079, "num_tokens": 117962974.0, "reward": 0.9810348987579346, "reward_std": 0.1425999477505684, "rewards/accuracy_reward": 0.6373263835906983, "rewards/brier_reward": 0.7495613336563111, "rewards/confidence_uniqueness_reward": 0.9255987882614136, "rewards/format_reward": 0.9904513955116272, "rewards/frontier_coverage_0": -0.0036967315711081026, "rewards/frontier_coverage_1": -0.0036967315711081026, "rewards/frontier_coverage_10": -0.0036967315711081026, "rewards/frontier_coverage_15": -0.0036967315711081026, "rewards/frontier_coverage_20": -0.0036967315711081026, "rewards/frontier_coverage_25": -0.0036967315711081026, "rewards/frontier_coverage_5": -0.0036967315711081026, "signal/accuracy_reward/centered_abs_mean": 0.18495008647441863, "signal/accuracy_reward/group_std_mean": 0.238821542263031, "signal/accuracy_reward/group_zero_std_frac": 0.3361111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09247504323720931, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09247504323720931, "signal/advantage_abs_mean": 0.10767804533243179, "signal/advantage_pre_scale_abs_mean": 0.10767804533243179, "signal/advantage_pre_scale_std": 0.1729002594947815, "signal/advantage_std": 0.1729002594947815, "signal/brier_reward/centered_abs_mean": 0.16945191323757172, "signal/brier_reward/group_std_mean": 0.21429203748703002, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016945191845297813, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016945191845297813, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04097889587283134, "signal/confidence_uniqueness_reward/group_std_mean": 0.061232827603816986, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004097889456897974, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004097889456897974, "signal/format_reward/centered_abs_mean": 0.01637369776144624, "signal/format_reward/group_std_mean": 0.031473302841186525, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00818684888072312, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00818684888072312, "signal/frontier_coverage_0/centered_abs_mean": 0.12938774824142457, "signal/frontier_coverage_0/group_std_mean": 0.18174587488174437, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_1/centered_abs_mean": 0.12938774824142457, "signal/frontier_coverage_1/group_std_mean": 0.18174587488174437, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_10/centered_abs_mean": 0.12938774824142457, "signal/frontier_coverage_10/group_std_mean": 0.18174587488174437, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_15/centered_abs_mean": 0.12938774824142457, "signal/frontier_coverage_15/group_std_mean": 0.18174587488174437, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_20/centered_abs_mean": 0.12938774824142457, "signal/frontier_coverage_20/group_std_mean": 0.18174587488174437, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_25/centered_abs_mean": 0.12938774824142457, "signal/frontier_coverage_25/group_std_mean": 0.18174587488174437, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_5/centered_abs_mean": 0.12938774824142457, "signal/frontier_coverage_5/group_std_mean": 0.18174587488174437, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018502447521314025, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018502447521314025, "step": 60 }, { "calibration/aurc": 0.2320311524237611, "calibration/batch_distribution_entropy": 0.8156248009958293, "calibration/buffer_distribution_entropy": 0.7178334508271775, "calibration/confidence_entropy": 0.44483765410335996, "calibration/coverage@0%": 0.01632690638939771, "calibration/coverage@1%": 0.01632690638939771, "calibration/coverage@10%": 0.28226071766560934, "calibration/coverage@15%": 0.4698960721302671, "calibration/coverage@20%": 0.5423117811753071, "calibration/coverage@25%": 0.5948163812308549, "calibration/coverage@30%": 0.6710554163185741, "calibration/coverage@5%": 0.04632690638939771, "calibration/ece": 0.11255049437942628, "calibration/mean_confidence": 0.7353874331667388, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008680555555555535, "completions/max_length": 3591.8, "completions/max_terminated_length": 3591.8, "completions/mean_length": 724.928466796875, "completions/mean_terminated_length": 731.31708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 197.2, "epoch": 0.1559980500243747, "grad_norm": 0.0005269707180559635, "learning_rate": 4.307228915662651e-06, "loss": -0.0064, "num_tokens": 129408198.0, "reward": 1.003051507472992, "reward_std": 0.12487713843584061, "rewards/accuracy_reward": 0.6729166626930236, "rewards/brier_reward": 0.7835439920425415, "rewards/confidence_uniqueness_reward": 0.9210333466529846, "rewards/format_reward": 0.9910590291023255, "rewards/frontier_coverage_0": 0.006052964180707931, "rewards/frontier_coverage_1": 0.006052964180707931, "rewards/frontier_coverage_10": 0.006052964180707931, "rewards/frontier_coverage_15": 0.006052964180707931, "rewards/frontier_coverage_20": 0.006052964180707931, "rewards/frontier_coverage_25": 0.006052964180707931, "rewards/frontier_coverage_5": 0.006052964180707931, "signal/accuracy_reward/centered_abs_mean": 0.15330946147441865, "signal/accuracy_reward/group_std_mean": 0.20192310214042664, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07665473073720933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07665473073720933, "signal/advantage_abs_mean": 0.09188215732574463, "signal/advantage_pre_scale_abs_mean": 0.09188215732574463, "signal/advantage_pre_scale_std": 0.16062064170837403, "signal/advantage_std": 0.16062064170837403, "signal/brier_reward/centered_abs_mean": 0.14177187383174897, "signal/brier_reward/group_std_mean": 0.18185266852378845, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014177187345921993, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014177187345921993, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04180399999022484, "signal/confidence_uniqueness_reward/group_std_mean": 0.06167575493454933, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004180399980396032, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004180399980396032, "signal/format_reward/centered_abs_mean": 0.015771484375, "signal/format_reward/group_std_mean": 0.030386429652571677, "signal/format_reward/group_zero_std_frac": 0.8750000238418579, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0078857421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0078857421875, "signal/frontier_coverage_0/centered_abs_mean": 0.09885098785161972, "signal/frontier_coverage_0/group_std_mean": 0.13911318629980088, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_1/centered_abs_mean": 0.09885098785161972, "signal/frontier_coverage_1/group_std_mean": 0.13911318629980088, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_10/centered_abs_mean": 0.09885098785161972, "signal/frontier_coverage_10/group_std_mean": 0.13911318629980088, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_15/centered_abs_mean": 0.09885098785161972, "signal/frontier_coverage_15/group_std_mean": 0.13911318629980088, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_20/centered_abs_mean": 0.09885098785161972, "signal/frontier_coverage_20/group_std_mean": 0.13911318629980088, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_25/centered_abs_mean": 0.09885098785161972, "signal/frontier_coverage_25/group_std_mean": 0.13911318629980088, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_5/centered_abs_mean": 0.09885098785161972, "signal/frontier_coverage_5/group_std_mean": 0.13911318629980088, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001413569157011807, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001413569157011807, "step": 65 }, { "calibration/aurc": 0.30146087691874146, "calibration/batch_distribution_entropy": 0.8401200097364473, "calibration/buffer_distribution_entropy": 0.7292469367397179, "calibration/confidence_entropy": 0.4543341177595561, "calibration/coverage@0%": 0.006362346830231948, "calibration/coverage@1%": 0.006362346830231948, "calibration/coverage@10%": 0.07782901349689861, "calibration/coverage@15%": 0.1092956801635653, "calibration/coverage@20%": 0.16031743821404396, "calibration/coverage@25%": 0.3165103015734869, "calibration/coverage@30%": 0.3856639134098664, "calibration/coverage@5%": 0.058095680163565276, "calibration/ece": 0.1530902409412818, "calibration/mean_confidence": 0.723583873833328, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555557, "completions/max_length": 3282.6, "completions/max_terminated_length": 3282.6, "completions/mean_length": 718.2440185546875, "completions/mean_terminated_length": 725.6932373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 167.4, "epoch": 0.16799790002624967, "grad_norm": 0.0005004884442314506, "learning_rate": 4.156626506024097e-06, "loss": -0.0088, "num_tokens": 140760513.0, "reward": 0.9935579299926758, "reward_std": 0.12153150737285615, "rewards/accuracy_reward": 0.6536458373069763, "rewards/brier_reward": 0.7778854846954346, "rewards/confidence_uniqueness_reward": 0.9321373701095581, "rewards/format_reward": 0.9896701335906982, "rewards/frontier_coverage_0": 0.008967609610408545, "rewards/frontier_coverage_1": 0.008967609610408545, "rewards/frontier_coverage_10": 0.008967609610408545, "rewards/frontier_coverage_15": 0.008967609610408545, "rewards/frontier_coverage_20": 0.008967609610408545, "rewards/frontier_coverage_25": 0.008967609610408545, "rewards/frontier_coverage_5": 0.008967609610408545, "signal/accuracy_reward/centered_abs_mean": 0.15254991352558137, "signal/accuracy_reward/group_std_mean": 0.20258214175701142, "signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07627495676279068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07627495676279068, "signal/advantage_abs_mean": 0.08828288316726685, "signal/advantage_pre_scale_abs_mean": 0.08828288316726685, "signal/advantage_pre_scale_std": 0.15849795639514924, "signal/advantage_std": 0.15849795639514924, "signal/brier_reward/centered_abs_mean": 0.13586993813514708, "signal/brier_reward/group_std_mean": 0.17457389533519746, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013586993515491485, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013586993515491485, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03803398087620735, "signal/confidence_uniqueness_reward/group_std_mean": 0.05799528583884239, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038033980876207353, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038033980876207353, "signal/format_reward/centered_abs_mean": 0.018071831949055196, "signal/format_reward/group_std_mean": 0.03415331579744816, "signal/format_reward/group_zero_std_frac": 0.8611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009035915974527598, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009035915974527598, "signal/frontier_coverage_0/centered_abs_mean": 0.11883060038089752, "signal/frontier_coverage_0/group_std_mean": 0.16314986646175383, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_1/centered_abs_mean": 0.11883060038089752, "signal/frontier_coverage_1/group_std_mean": 0.16314986646175383, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_10/centered_abs_mean": 0.11883060038089752, "signal/frontier_coverage_10/group_std_mean": 0.16314986646175383, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_15/centered_abs_mean": 0.11883060038089752, "signal/frontier_coverage_15/group_std_mean": 0.16314986646175383, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_20/centered_abs_mean": 0.11883060038089752, "signal/frontier_coverage_20/group_std_mean": 0.16314986646175383, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_25/centered_abs_mean": 0.11883060038089752, "signal/frontier_coverage_25/group_std_mean": 0.16314986646175383, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_5/centered_abs_mean": 0.11883060038089752, "signal/frontier_coverage_5/group_std_mean": 0.16314986646175383, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016992775024846196, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016992775024846196, "step": 70 }, { "calibration/aurc": 0.2161523716364652, "calibration/batch_distribution_entropy": 0.8359568738154429, "calibration/buffer_distribution_entropy": 0.7416488545921555, "calibration/confidence_entropy": 0.4851037077204311, "calibration/coverage@0%": 0.001058201058201058, "calibration/coverage@1%": 0.001058201058201058, "calibration/coverage@10%": 0.19976453641375108, "calibration/coverage@15%": 0.33732977608298814, "calibration/coverage@20%": 0.5214813130814425, "calibration/coverage@25%": 0.6504089077351132, "calibration/coverage@30%": 0.7415668559271692, "calibration/coverage@5%": 0.08535139477547854, "calibration/ece": 0.1188181935086359, "calibration/mean_confidence": 0.7158355008307618, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004513888888888884, "completions/max_length": 3429.2, "completions/max_terminated_length": 3429.2, "completions/mean_length": 726.0884521484375, "completions/mean_terminated_length": 729.3609375, "completions/min_length": 0.0, "completions/min_terminated_length": 211.8, "epoch": 0.17999775002812465, "grad_norm": 0.00046963320346549153, "learning_rate": 4.006024096385543e-06, "loss": -0.0016, "num_tokens": 152189948.0, "reward": 1.0248547196388245, "reward_std": 0.11748676300048828, "rewards/accuracy_reward": 0.7057291746139527, "rewards/brier_reward": 0.8091526508331299, "rewards/confidence_uniqueness_reward": 0.9357035756111145, "rewards/format_reward": 0.9953992962837219, "rewards/frontier_coverage_0": -0.0019492823630571365, "rewards/frontier_coverage_1": -0.0019492823630571365, "rewards/frontier_coverage_10": -0.0019492823630571365, "rewards/frontier_coverage_15": -0.0019492823630571365, "rewards/frontier_coverage_20": -0.0019492823630571365, "rewards/frontier_coverage_25": -0.0019492823630571365, "rewards/frontier_coverage_5": -0.0019492823630571365, "signal/accuracy_reward/centered_abs_mean": 0.1632920980453491, "signal/accuracy_reward/group_std_mean": 0.20991926789283752, "signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08164604902267455, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08164604902267455, "signal/advantage_abs_mean": 0.08842966109514236, "signal/advantage_pre_scale_abs_mean": 0.08842966109514236, "signal/advantage_pre_scale_std": 0.15177057385444642, "signal/advantage_std": 0.15177057385444642, "signal/brier_reward/centered_abs_mean": 0.12017300575971604, "signal/brier_reward/group_std_mean": 0.15719916820526122, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012017300724983216, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012017300724983216, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03090880624949932, "signal/confidence_uniqueness_reward/group_std_mean": 0.046534180641174316, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030908805783838034, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030908805783838034, "signal/format_reward/centered_abs_mean": 0.008512369729578495, "signal/format_reward/group_std_mean": 0.01946439780294895, "signal/format_reward/group_zero_std_frac": 0.9083333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004256184864789248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004256184864789248, "signal/frontier_coverage_0/centered_abs_mean": 0.10867546051740647, "signal/frontier_coverage_0/group_std_mean": 0.14882293045520784, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_1/centered_abs_mean": 0.10867546051740647, "signal/frontier_coverage_1/group_std_mean": 0.14882293045520784, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_10/centered_abs_mean": 0.10867546051740647, "signal/frontier_coverage_10/group_std_mean": 0.14882293045520784, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_15/centered_abs_mean": 0.10867546051740647, "signal/frontier_coverage_15/group_std_mean": 0.14882293045520784, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_20/centered_abs_mean": 0.10867546051740647, "signal/frontier_coverage_20/group_std_mean": 0.14882293045520784, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_25/centered_abs_mean": 0.10867546051740647, "signal/frontier_coverage_25/group_std_mean": 0.14882293045520784, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_5/centered_abs_mean": 0.10867546051740647, "signal/frontier_coverage_5/group_std_mean": 0.14882293045520784, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015540590975433588, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015540590975433588, "step": 75 }, { "calibration/aurc": 0.18651022352019814, "calibration/batch_distribution_entropy": 0.7930350849715844, "calibration/buffer_distribution_entropy": 0.7505434491465455, "calibration/confidence_entropy": 0.43033643310086267, "calibration/coverage@0%": 0.020376741516687377, "calibration/coverage@1%": 0.020376741516687377, "calibration/coverage@10%": 0.28033401661756563, "calibration/coverage@15%": 0.3723768532157701, "calibration/coverage@20%": 0.6270485291790584, "calibration/coverage@25%": 0.6903603691654683, "calibration/coverage@30%": 0.8499867357339328, "calibration/coverage@5%": 0.18643418276995108, "calibration/ece": 0.15777915022958544, "calibration/mean_confidence": 0.7495933013222421, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006770833333333304, "completions/max_length": 3568.8, "completions/max_terminated_length": 3568.8, "completions/mean_length": 770.2166625976563, "completions/mean_terminated_length": 775.5416259765625, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.19199760002999963, "grad_norm": 0.0005624489858746529, "learning_rate": 3.855421686746989e-06, "loss": -0.0061, "num_tokens": 164116124.0, "reward": 1.008245551586151, "reward_std": 0.11997615545988083, "rewards/accuracy_reward": 0.6761284828186035, "rewards/brier_reward": 0.7941550731658935, "rewards/confidence_uniqueness_reward": 0.9332335710525512, "rewards/format_reward": 0.9932291626930236, "rewards/frontier_coverage_0": 0.008270517364144326, "rewards/frontier_coverage_1": 0.008270517364144326, "rewards/frontier_coverage_10": 0.008270517364144326, "rewards/frontier_coverage_15": 0.008270517364144326, "rewards/frontier_coverage_20": 0.008270517364144326, "rewards/frontier_coverage_25": 0.008270517364144326, "rewards/frontier_coverage_5": 0.008270517364144326, "signal/accuracy_reward/centered_abs_mean": 0.15729708969593048, "signal/accuracy_reward/group_std_mean": 0.21044376790523528, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07864854484796524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07864854484796524, "signal/advantage_abs_mean": 0.08815103322267533, "signal/advantage_pre_scale_abs_mean": 0.08815103322267533, "signal/advantage_pre_scale_std": 0.15297050178050994, "signal/advantage_std": 0.15297050178050994, "signal/brier_reward/centered_abs_mean": 0.1248743325471878, "signal/brier_reward/group_std_mean": 0.16186331510543822, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012487433291971684, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012487433291971684, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034130534902215, "signal/confidence_uniqueness_reward/group_std_mean": 0.049203697592020035, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034130535554140807, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034130535554140807, "signal/format_reward/centered_abs_mean": 0.011599392537027598, "signal/format_reward/group_std_mean": 0.021724069118499757, "signal/format_reward/group_zero_std_frac": 0.9111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005799696268513799, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005799696268513799, "signal/frontier_coverage_0/centered_abs_mean": 0.10307898521423339, "signal/frontier_coverage_0/group_std_mean": 0.143079274892807, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_1/centered_abs_mean": 0.10307898521423339, "signal/frontier_coverage_1/group_std_mean": 0.143079274892807, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_10/centered_abs_mean": 0.10307898521423339, "signal/frontier_coverage_10/group_std_mean": 0.143079274892807, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_15/centered_abs_mean": 0.10307898521423339, "signal/frontier_coverage_15/group_std_mean": 0.143079274892807, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_20/centered_abs_mean": 0.10307898521423339, "signal/frontier_coverage_20/group_std_mean": 0.143079274892807, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_25/centered_abs_mean": 0.10307898521423339, "signal/frontier_coverage_25/group_std_mean": 0.143079274892807, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_5/centered_abs_mean": 0.10307898521423339, "signal/frontier_coverage_5/group_std_mean": 0.143079274892807, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014740294544026255, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014740294544026255, "step": 80 }, { "calibration/aurc": 0.1885673146932413, "calibration/batch_distribution_entropy": 0.7733087879174113, "calibration/buffer_distribution_entropy": 0.7553238636343612, "calibration/confidence_entropy": 0.40886239649926487, "calibration/coverage@0%": 0.009919262309161616, "calibration/coverage@1%": 0.009919262309161616, "calibration/coverage@10%": 0.15248443679487708, "calibration/coverage@15%": 0.37427037556169357, "calibration/coverage@20%": 0.5941326314740762, "calibration/coverage@25%": 0.7604391622349562, "calibration/coverage@30%": 0.871613869192398, "calibration/coverage@5%": 0.027166753583158128, "calibration/ece": 0.12875506861667763, "calibration/mean_confidence": 0.7515428029706879, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003732638888888906, "completions/max_length": 3308.4, "completions/max_terminated_length": 3308.4, "completions/mean_length": 760.9641723632812, "completions/mean_terminated_length": 763.8289794921875, "completions/min_length": 0.0, "completions/min_terminated_length": 234.6, "epoch": 0.2039974500318746, "grad_norm": 0.0005237568984739482, "learning_rate": 3.7048192771084342e-06, "loss": -0.0019, "num_tokens": 175969631.0, "reward": 1.0170960187911988, "reward_std": 0.1100090652704239, "rewards/accuracy_reward": 0.6894097208976746, "rewards/brier_reward": 0.8078195929527283, "rewards/confidence_uniqueness_reward": 0.919755506515503, "rewards/format_reward": 0.9962673664093018, "rewards/frontier_coverage_0": 0.014984596229624003, "rewards/frontier_coverage_1": 0.014984596229624003, "rewards/frontier_coverage_10": 0.014984596229624003, "rewards/frontier_coverage_15": 0.014984596229624003, "rewards/frontier_coverage_20": 0.014984596229624003, "rewards/frontier_coverage_25": 0.014984596229624003, "rewards/frontier_coverage_5": 0.014984596229624003, "signal/accuracy_reward/centered_abs_mean": 0.15218099057674409, "signal/accuracy_reward/group_std_mean": 0.19950250685214996, "signal/accuracy_reward/group_zero_std_frac": 0.43055556416511537, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07609049528837204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07609049528837204, "signal/advantage_abs_mean": 0.08195096254348755, "signal/advantage_pre_scale_abs_mean": 0.08195096254348755, "signal/advantage_pre_scale_std": 0.1454104334115982, "signal/advantage_std": 0.1454104334115982, "signal/brier_reward/centered_abs_mean": 0.11562369614839554, "signal/brier_reward/group_std_mean": 0.15216899812221527, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011562369205057621, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011562369205057621, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04034182578325272, "signal/confidence_uniqueness_reward/group_std_mean": 0.0564569778740406, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004034182662144304, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004034182662144304, "signal/format_reward/centered_abs_mean": 0.006960720429196954, "signal/format_reward/group_std_mean": 0.016455814242362976, "signal/format_reward/group_zero_std_frac": 0.919444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003480360214598477, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003480360214598477, "signal/frontier_coverage_0/centered_abs_mean": 0.09572111815214157, "signal/frontier_coverage_0/group_std_mean": 0.13204465210437774, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_1/centered_abs_mean": 0.09572111815214157, "signal/frontier_coverage_1/group_std_mean": 0.13204465210437774, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_10/centered_abs_mean": 0.09572111815214157, "signal/frontier_coverage_10/group_std_mean": 0.13204465210437774, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_15/centered_abs_mean": 0.09572111815214157, "signal/frontier_coverage_15/group_std_mean": 0.13204465210437774, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_20/centered_abs_mean": 0.09572111815214157, "signal/frontier_coverage_20/group_std_mean": 0.13204465210437774, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_25/centered_abs_mean": 0.09572111815214157, "signal/frontier_coverage_25/group_std_mean": 0.13204465210437774, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_5/centered_abs_mean": 0.09572111815214157, "signal/frontier_coverage_5/group_std_mean": 0.13204465210437774, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013688119826838375, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013688119826838375, "step": 85 }, { "calibration/aurc": 0.13547219535403135, "calibration/batch_distribution_entropy": 0.8332040774364462, "calibration/buffer_distribution_entropy": 0.7601464935483995, "calibration/confidence_entropy": 0.4530539299800417, "calibration/coverage@0%": 0.07140748031496062, "calibration/coverage@1%": 0.09484498031496062, "calibration/coverage@10%": 0.49112532808398945, "calibration/coverage@15%": 0.6052944553805775, "calibration/coverage@20%": 0.7221538713910761, "calibration/coverage@25%": 0.8530511811023622, "calibration/coverage@30%": 0.9115731627296588, "calibration/coverage@5%": 0.2908136482939633, "calibration/ece": 0.09694233170413263, "calibration/mean_confidence": 0.6963118038044955, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003472222222222232, "completions/max_length": 3379.0, "completions/max_terminated_length": 3379.0, "completions/mean_length": 740.676220703125, "completions/mean_terminated_length": 743.2724243164063, "completions/min_length": 0.0, "completions/min_terminated_length": 210.8, "epoch": 0.2159973000337496, "grad_norm": 0.0005956885870546103, "learning_rate": 3.5542168674698798e-06, "loss": -0.0022, "num_tokens": 187570893.0, "reward": 1.0165443658828734, "reward_std": 0.11067529022693634, "rewards/accuracy_reward": 0.6825520873069764, "rewards/brier_reward": 0.8147860646247864, "rewards/confidence_uniqueness_reward": 0.935043203830719, "rewards/format_reward": 0.9965277791023255, "rewards/frontier_coverage_0": 0.020194912049919366, "rewards/frontier_coverage_1": 0.020194912049919366, "rewards/frontier_coverage_10": 0.020194912049919366, "rewards/frontier_coverage_15": 0.020194912049919366, "rewards/frontier_coverage_20": 0.020194912049919366, "rewards/frontier_coverage_25": 0.020194912049919366, "rewards/frontier_coverage_5": 0.020194912049919366, "signal/accuracy_reward/centered_abs_mean": 0.1577311247587204, "signal/accuracy_reward/group_std_mean": 0.20566980242729188, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0788655623793602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0788655623793602, "signal/advantage_abs_mean": 0.08244062066078187, "signal/advantage_pre_scale_abs_mean": 0.08244062066078187, "signal/advantage_pre_scale_std": 0.14315189719200133, "signal/advantage_std": 0.14315189719200133, "signal/brier_reward/centered_abs_mean": 0.11458506137132644, "signal/brier_reward/group_std_mean": 0.15111578106880189, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011458505876362324, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011458505876362324, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03055480159819126, "signal/confidence_uniqueness_reward/group_std_mean": 0.04391605779528618, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003055480308830738, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003055480308830738, "signal/format_reward/centered_abs_mean": 0.00646701380610466, "signal/format_reward/group_std_mean": 0.014765101112425327, "signal/format_reward/group_zero_std_frac": 0.9305555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00323350690305233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00323350690305233, "signal/frontier_coverage_0/centered_abs_mean": 0.11731700897216797, "signal/frontier_coverage_0/group_std_mean": 0.1610626608133316, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_1/centered_abs_mean": 0.11731700897216797, "signal/frontier_coverage_1/group_std_mean": 0.1610626608133316, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_10/centered_abs_mean": 0.11731700897216797, "signal/frontier_coverage_10/group_std_mean": 0.1610626608133316, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_15/centered_abs_mean": 0.11731700897216797, "signal/frontier_coverage_15/group_std_mean": 0.1610626608133316, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_20/centered_abs_mean": 0.11731700897216797, "signal/frontier_coverage_20/group_std_mean": 0.1610626608133316, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_25/centered_abs_mean": 0.11731700897216797, "signal/frontier_coverage_25/group_std_mean": 0.1610626608133316, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_5/centered_abs_mean": 0.11731700897216797, "signal/frontier_coverage_5/group_std_mean": 0.1610626608133316, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016776332166045904, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016776332166045904, "step": 90 }, { "calibration/aurc": 0.19124679587417506, "calibration/batch_distribution_entropy": 0.8975058875243824, "calibration/buffer_distribution_entropy": 0.7743957879617259, "calibration/confidence_entropy": 0.48854993876895064, "calibration/coverage@0%": 0.04803693795089366, "calibration/coverage@1%": 0.08241193795089366, "calibration/coverage@10%": 0.38390832849323486, "calibration/coverage@15%": 0.5201580258706102, "calibration/coverage@20%": 0.6464850084397695, "calibration/coverage@25%": 0.6967907865926141, "calibration/coverage@30%": 0.7429901698420995, "calibration/coverage@5%": 0.16712853853938087, "calibration/ece": 0.1174216128367086, "calibration/mean_confidence": 0.624049445549762, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003819444444444442, "completions/max_length": 3475.2, "completions/max_terminated_length": 3475.2, "completions/mean_length": 731.6876953125, "completions/mean_terminated_length": 734.4987670898438, "completions/min_length": 0.0, "completions/min_terminated_length": 182.4, "epoch": 0.22799715003562457, "grad_norm": 0.0005185811896808445, "learning_rate": 3.4036144578313257e-06, "loss": -0.0028, "num_tokens": 199091615.0, "reward": 1.0106172680854797, "reward_std": 0.10462814420461655, "rewards/accuracy_reward": 0.6690972208976745, "rewards/brier_reward": 0.814009141921997, "rewards/confidence_uniqueness_reward": 0.9448173999786377, "rewards/format_reward": 0.9961805462837219, "rewards/frontier_coverage_0": 0.020936440164223313, "rewards/frontier_coverage_1": 0.020936440164223313, "rewards/frontier_coverage_10": 0.020936440164223313, "rewards/frontier_coverage_15": 0.020936440164223313, "rewards/frontier_coverage_20": 0.020936440164223313, "rewards/frontier_coverage_25": 0.020936440164223313, "rewards/frontier_coverage_5": 0.020936440164223313, "signal/accuracy_reward/centered_abs_mean": 0.14444444328546524, "signal/accuracy_reward/group_std_mean": 0.19350114166736604, "signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07222222164273262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07222222164273262, "signal/advantage_abs_mean": 0.0762260913848877, "signal/advantage_pre_scale_abs_mean": 0.0762260913848877, "signal/advantage_pre_scale_std": 0.13657819628715515, "signal/advantage_std": 0.13657819628715515, "signal/brier_reward/centered_abs_mean": 0.11326702684164047, "signal/brier_reward/group_std_mean": 0.14780981540679933, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011326702870428562, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011326702870428562, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027253608033061028, "signal/confidence_uniqueness_reward/group_std_mean": 0.04070703834295273, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027253609616309403, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027253609616309403, "signal/format_reward/centered_abs_mean": 0.00711805559694767, "signal/format_reward/group_std_mean": 0.016377711296081544, "signal/format_reward/group_zero_std_frac": 0.9222222447395325, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003559027798473835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003559027798473835, "signal/frontier_coverage_0/centered_abs_mean": 0.13173725605010986, "signal/frontier_coverage_0/group_std_mean": 0.1795190155506134, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_1/centered_abs_mean": 0.13173725605010986, "signal/frontier_coverage_1/group_std_mean": 0.1795190155506134, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_10/centered_abs_mean": 0.13173725605010986, "signal/frontier_coverage_10/group_std_mean": 0.1795190155506134, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_15/centered_abs_mean": 0.13173725605010986, "signal/frontier_coverage_15/group_std_mean": 0.1795190155506134, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_20/centered_abs_mean": 0.13173725605010986, "signal/frontier_coverage_20/group_std_mean": 0.1795190155506134, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_25/centered_abs_mean": 0.13173725605010986, "signal/frontier_coverage_25/group_std_mean": 0.1795190155506134, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_5/centered_abs_mean": 0.13173725605010986, "signal/frontier_coverage_5/group_std_mean": 0.1795190155506134, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001883842796087265, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001883842796087265, "step": 95 }, { "calibration/aurc": 0.18834491364983547, "calibration/batch_distribution_entropy": 0.8389232639652772, "calibration/buffer_distribution_entropy": 0.7841595452485729, "calibration/confidence_entropy": 0.4508094524363834, "calibration/coverage@0%": 0.012541010611400316, "calibration/coverage@1%": 0.012541010611400316, "calibration/coverage@10%": 0.23095183433959976, "calibration/coverage@15%": 0.41025999431848914, "calibration/coverage@20%": 0.6366873761277325, "calibration/coverage@25%": 0.8210446158298123, "calibration/coverage@30%": 0.8806853070175439, "calibration/coverage@5%": 0.08330307348017359, "calibration/ece": 0.12463181735349024, "calibration/mean_confidence": 0.7062022295145554, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004861111111111116, "completions/max_length": 3220.2, "completions/max_terminated_length": 3220.2, "completions/mean_length": 718.14775390625, "completions/mean_terminated_length": 721.6147583007812, "completions/min_length": 0.0, "completions/min_terminated_length": 209.4, "epoch": 0.23999700003749952, "grad_norm": 0.000564222689718008, "learning_rate": 3.2530120481927713e-06, "loss": -0.003, "num_tokens": 210463749.0, "reward": 1.0169752597808839, "reward_std": 0.1152550533413887, "rewards/accuracy_reward": 0.678819453716278, "rewards/brier_reward": 0.8254865527153015, "rewards/confidence_uniqueness_reward": 0.9365423917770386, "rewards/format_reward": 0.9946180582046509, "rewards/frontier_coverage_0": 0.040495523065328595, "rewards/frontier_coverage_1": 0.040495523065328595, "rewards/frontier_coverage_10": 0.040495523065328595, "rewards/frontier_coverage_15": 0.040495523065328595, "rewards/frontier_coverage_20": 0.040495523065328595, "rewards/frontier_coverage_25": 0.040495523065328595, "rewards/frontier_coverage_5": 0.040495523065328595, "signal/accuracy_reward/centered_abs_mean": 0.15894097089767456, "signal/accuracy_reward/group_std_mean": 0.2088989406824112, "signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07947048544883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07947048544883728, "signal/advantage_abs_mean": 0.08560490906238556, "signal/advantage_pre_scale_abs_mean": 0.08560490906238556, "signal/advantage_pre_scale_std": 0.14947248101234437, "signal/advantage_std": 0.14947248101234437, "signal/brier_reward/centered_abs_mean": 0.12055338621139526, "signal/brier_reward/group_std_mean": 0.15626082718372344, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012055338732898236, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012055338732898236, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03030674159526825, "signal/confidence_uniqueness_reward/group_std_mean": 0.04416131526231766, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030306743923574687, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030306743923574687, "signal/format_reward/centered_abs_mean": 0.009190538339316845, "signal/format_reward/group_std_mean": 0.018485058657824995, "signal/format_reward/group_zero_std_frac": 0.919444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004595269169658423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004595269169658423, "signal/frontier_coverage_0/centered_abs_mean": 0.11489113122224807, "signal/frontier_coverage_0/group_std_mean": 0.15738584399223327, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_1/centered_abs_mean": 0.11489113122224807, "signal/frontier_coverage_1/group_std_mean": 0.15738584399223327, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_10/centered_abs_mean": 0.11489113122224807, "signal/frontier_coverage_10/group_std_mean": 0.15738584399223327, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_15/centered_abs_mean": 0.11489113122224807, "signal/frontier_coverage_15/group_std_mean": 0.15738584399223327, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_20/centered_abs_mean": 0.11489113122224807, "signal/frontier_coverage_20/group_std_mean": 0.15738584399223327, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_25/centered_abs_mean": 0.11489113122224807, "signal/frontier_coverage_25/group_std_mean": 0.15738584399223327, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_5/centered_abs_mean": 0.11489113122224807, "signal/frontier_coverage_5/group_std_mean": 0.15738584399223327, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016429432900622488, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016429432900622488, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.14868938105166316, "eval_calibration/batch_distribution_entropy": 0.7457575510587467, "eval_calibration/buffer_distribution_entropy": 0.7879834341654887, "eval_calibration/confidence_entropy": 0.40222145732977205, "eval_calibration/coverage@0%": 0.2552083333333333, "eval_calibration/coverage@1%": 0.2552083333333333, "eval_calibration/coverage@10%": 0.4583333333333333, "eval_calibration/coverage@15%": 0.5260416666666666, "eval_calibration/coverage@20%": 0.7604166666666666, "eval_calibration/coverage@25%": 0.8854166666666666, "eval_calibration/coverage@30%": 0.9583333333333334, "eval_calibration/coverage@5%": 0.2916666666666667, "eval_calibration/ece": 0.17504294376882312, "eval_calibration/mean_confidence": 0.754312944601324, "eval_completions/clipped_ratio": 0.004340277777777772, "eval_completions/max_length": 2818.6666666666665, "eval_completions/max_terminated_length": 2818.6666666666665, "eval_completions/mean_length": 706.1878763834635, "eval_completions/mean_terminated_length": 709.2273864746094, "eval_completions/min_length": 131.5, "eval_completions/min_terminated_length": 234.5, "eval_loss": 0.0, "eval_num_tokens": 210463749.0, "eval_reward": 1.0165491104125977, "eval_reward_std": 0.24284635484218597, "eval_rewards/accuracy_reward": 0.6935763855775198, "eval_rewards/brier_reward": 0.8220989306767782, "eval_rewards/confidence_uniqueness_reward": 0.8749090135097504, "eval_rewards/format_reward": 0.9939236044883728, "eval_rewards/frontier_coverage_0": 0.03095191267008583, "eval_rewards/frontier_coverage_1": 0.03095191267008583, "eval_rewards/frontier_coverage_10": 0.03095191267008583, "eval_rewards/frontier_coverage_15": 0.03095191267008583, "eval_rewards/frontier_coverage_20": 0.03095191267008583, "eval_rewards/frontier_coverage_25": 0.03095191267008583, "eval_rewards/frontier_coverage_5": 0.03095191267008583, "eval_runtime": 185.5248, "eval_samples_per_second": 5.39, "eval_signal/accuracy_reward/centered_abs_mean": 0.4078233540058136, "eval_signal/accuracy_reward/group_std_mean": 0.4573976546525955, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2039116770029068, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2039116770029068, "eval_signal/advantage_abs_mean": 0.2098855177561442, "eval_signal/advantage_pre_scale_abs_mean": 0.2098855177561442, "eval_signal/advantage_pre_scale_std": 0.24122367799282074, "eval_signal/advantage_std": 0.24122367799282074, "eval_signal/brier_reward/centered_abs_mean": 0.20792252322038016, "eval_signal/brier_reward/group_std_mean": 0.26402051995197934, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02079225331544876, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02079225331544876, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05713109113276005, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08232143521308899, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005713109392672777, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005713109392672777, "eval_signal/format_reward/centered_abs_mean": 0.011773003110041222, "eval_signal/format_reward/group_std_mean": 0.034373246443768345, "eval_signal/format_reward/group_zero_std_frac": 0.8055555721124014, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005886501555020611, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.005886501555020611, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.13855222860972086, "eval_signal/frontier_coverage_0/group_std_mean": 0.23446151365836462, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.13855222860972086, "eval_signal/frontier_coverage_1/group_std_mean": 0.23446151365836462, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.13855222860972086, "eval_signal/frontier_coverage_10/group_std_mean": 0.23446151365836462, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.13855222860972086, "eval_signal/frontier_coverage_15/group_std_mean": 0.23446151365836462, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.13855222860972086, "eval_signal/frontier_coverage_20/group_std_mean": 0.23446151365836462, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.13855222860972086, "eval_signal/frontier_coverage_25/group_std_mean": 0.23446151365836462, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.13855222860972086, "eval_signal/frontier_coverage_5/group_std_mean": 0.23446151365836462, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019812969064029553, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019812969064029553, "eval_steps_per_second": 0.032, "step": 100 }, { "calibration/aurc": 0.2962014591980389, "calibration/batch_distribution_entropy": 0.7614938360584489, "calibration/buffer_distribution_entropy": 0.7884020557358128, "calibration/confidence_entropy": 0.4082213590920844, "calibration/coverage@0%": 0.006255439512619669, "calibration/coverage@1%": 0.006255439512619669, "calibration/coverage@10%": 0.12406077748767044, "calibration/coverage@15%": 0.14494850594720046, "calibration/coverage@20%": 0.34461551711633304, "calibration/coverage@25%": 0.4896351900203076, "calibration/coverage@30%": 0.5572739882506528, "calibration/coverage@5%": 0.07414055700609226, "calibration/ece": 0.18316298996542563, "calibration/mean_confidence": 0.7656324318669362, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004947916666666674, "completions/max_length": 3749.6, "completions/max_terminated_length": 3749.6, "completions/mean_length": 706.183251953125, "completions/mean_terminated_length": 709.6047241210938, "completions/min_length": 0.0, "completions/min_terminated_length": 195.8, "epoch": 0.2519968500393745, "grad_norm": 0.0007198955281637609, "learning_rate": 3.1024096385542172e-06, "loss": -0.0035, "num_tokens": 221675844.0, "reward": 1.012697958946228, "reward_std": 0.11043240427970887, "rewards/accuracy_reward": 0.6752604126930237, "rewards/brier_reward": 0.8130281567573547, "rewards/confidence_uniqueness_reward": 0.9284329533576965, "rewards/format_reward": 0.9948784708976746, "rewards/frontier_coverage_0": 0.03478905353695154, "rewards/frontier_coverage_1": 0.03478905353695154, "rewards/frontier_coverage_10": 0.03478905353695154, "rewards/frontier_coverage_15": 0.03478905353695154, "rewards/frontier_coverage_20": 0.03478905353695154, "rewards/frontier_coverage_25": 0.03478905353695154, "rewards/frontier_coverage_5": 0.03478905353695154, "signal/accuracy_reward/centered_abs_mean": 0.1487250432372093, "signal/accuracy_reward/group_std_mean": 0.19329376816749572, "signal/accuracy_reward/group_zero_std_frac": 0.4666666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07436252161860465, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07436252161860465, "signal/advantage_abs_mean": 0.08278846144676208, "signal/advantage_pre_scale_abs_mean": 0.08278846144676208, "signal/advantage_pre_scale_std": 0.14614979028701783, "signal/advantage_std": 0.14614979028701783, "signal/brier_reward/centered_abs_mean": 0.12054271399974822, "signal/brier_reward/group_std_mean": 0.15707246363162994, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01205427125096321, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01205427125096321, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03356652893126011, "signal/confidence_uniqueness_reward/group_std_mean": 0.04841043651103973, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003356653032824397, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003356653032824397, "signal/format_reward/centered_abs_mean": 0.007872178638353943, "signal/format_reward/group_std_mean": 0.017148750275373457, "signal/format_reward/group_zero_std_frac": 0.919444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0039360893191769716, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0039360893191769716, "signal/frontier_coverage_0/centered_abs_mean": 0.10029449313879013, "signal/frontier_coverage_0/group_std_mean": 0.1406713530421257, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_1/centered_abs_mean": 0.10029449313879013, "signal/frontier_coverage_1/group_std_mean": 0.1406713530421257, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_10/centered_abs_mean": 0.10029449313879013, "signal/frontier_coverage_10/group_std_mean": 0.1406713530421257, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_15/centered_abs_mean": 0.10029449313879013, "signal/frontier_coverage_15/group_std_mean": 0.1406713530421257, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_20/centered_abs_mean": 0.10029449313879013, "signal/frontier_coverage_20/group_std_mean": 0.1406713530421257, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_25/centered_abs_mean": 0.10029449313879013, "signal/frontier_coverage_25/group_std_mean": 0.1406713530421257, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_5/centered_abs_mean": 0.10029449313879013, "signal/frontier_coverage_5/group_std_mean": 0.1406713530421257, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014342111768200993, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014342111768200993, "step": 105 }, { "calibration/aurc": 0.13740946154687023, "calibration/batch_distribution_entropy": 0.7387166352713965, "calibration/buffer_distribution_entropy": 0.7875437463831182, "calibration/confidence_entropy": 0.3854279778031112, "calibration/coverage@0%": 0.005734620418848168, "calibration/coverage@1%": 0.005734620418848168, "calibration/coverage@10%": 0.4128860345212545, "calibration/coverage@15%": 0.639678830954757, "calibration/coverage@20%": 0.7715804973821989, "calibration/coverage@25%": 0.8597158595113438, "calibration/coverage@30%": 0.9363928883071553, "calibration/coverage@5%": 0.24802201094509685, "calibration/ece": 0.10715767769046516, "calibration/mean_confidence": 0.7670220325744023, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0035590277777777677, "completions/max_length": 3449.4, "completions/max_terminated_length": 3449.4, "completions/mean_length": 713.4057373046875, "completions/mean_terminated_length": 715.9921508789063, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.2639967000412495, "grad_norm": 0.0006309704622253776, "learning_rate": 2.9518072289156627e-06, "loss": -0.0015, "num_tokens": 233002726.0, "reward": 1.0424779415130616, "reward_std": 0.10481331050395966, "rewards/accuracy_reward": 0.7316840291023254, "rewards/brier_reward": 0.8372745990753174, "rewards/confidence_uniqueness_reward": 0.9280092358589173, "rewards/format_reward": 0.99609375, "rewards/frontier_coverage_0": 0.020585645362734793, "rewards/frontier_coverage_1": 0.020585645362734793, "rewards/frontier_coverage_10": 0.020585645362734793, "rewards/frontier_coverage_15": 0.020585645362734793, "rewards/frontier_coverage_20": 0.020585645362734793, "rewards/frontier_coverage_25": 0.020585645362734793, "rewards/frontier_coverage_5": 0.020585645362734793, "signal/accuracy_reward/centered_abs_mean": 0.13774413764476776, "signal/accuracy_reward/group_std_mean": 0.1850327730178833, "signal/accuracy_reward/group_zero_std_frac": 0.45555556416511533, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06887206882238388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06887206882238388, "signal/advantage_abs_mean": 0.07664794921875, "signal/advantage_pre_scale_abs_mean": 0.07664794921875, "signal/advantage_pre_scale_std": 0.1405678302049637, "signal/advantage_std": 0.1405678302049637, "signal/brier_reward/centered_abs_mean": 0.11345363408327103, "signal/brier_reward/group_std_mean": 0.15164164900779725, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011345363780856132, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011345363780856132, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033889131247997285, "signal/confidence_uniqueness_reward/group_std_mean": 0.04748179391026497, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033889132551848888, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033889132551848888, "signal/format_reward/centered_abs_mean": 0.0070800781715661286, "signal/format_reward/group_std_mean": 0.014973613433539867, "signal/format_reward/group_zero_std_frac": 0.9333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0035400390857830643, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0035400390857830643, "signal/frontier_coverage_0/centered_abs_mean": 0.10066508799791336, "signal/frontier_coverage_0/group_std_mean": 0.14207510650157928, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_1/centered_abs_mean": 0.10066508799791336, "signal/frontier_coverage_1/group_std_mean": 0.14207510650157928, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_10/centered_abs_mean": 0.10066508799791336, "signal/frontier_coverage_10/group_std_mean": 0.14207510650157928, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_15/centered_abs_mean": 0.10066508799791336, "signal/frontier_coverage_15/group_std_mean": 0.14207510650157928, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_20/centered_abs_mean": 0.10066508799791336, "signal/frontier_coverage_20/group_std_mean": 0.14207510650157928, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_25/centered_abs_mean": 0.10066508799791336, "signal/frontier_coverage_25/group_std_mean": 0.14207510650157928, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_5/centered_abs_mean": 0.10066508799791336, "signal/frontier_coverage_5/group_std_mean": 0.14207510650157928, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014395107515156268, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014395107515156268, "step": 110 }, { "calibration/aurc": 0.2610249615426862, "calibration/batch_distribution_entropy": 0.8931124788213323, "calibration/buffer_distribution_entropy": 0.7904711233092072, "calibration/confidence_entropy": 0.4444131127935872, "calibration/coverage@0%": 0.0183356728183972, "calibration/coverage@1%": 0.0183356728183972, "calibration/coverage@10%": 0.1648454105202722, "calibration/coverage@15%": 0.23299114257284842, "calibration/coverage@20%": 0.44790129663351574, "calibration/coverage@25%": 0.5566237309402716, "calibration/coverage@30%": 0.6543754294352068, "calibration/coverage@5%": 0.06807389271368516, "calibration/ece": 0.20020606774460367, "calibration/mean_confidence": 0.6489293143319701, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00512152777777779, "completions/max_length": 3599.4, "completions/max_terminated_length": 3599.4, "completions/mean_length": 718.4827392578125, "completions/mean_terminated_length": 722.2009521484375, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.27599655004312446, "grad_norm": 0.0005341703654266894, "learning_rate": 2.8012048192771087e-06, "loss": -0.0034, "num_tokens": 244358847.0, "reward": 1.0126498460769653, "reward_std": 0.10888111293315887, "rewards/accuracy_reward": 0.6730902791023254, "rewards/brier_reward": 0.8102145433425904, "rewards/confidence_uniqueness_reward": 0.9390350937843323, "rewards/format_reward": 0.9948784708976746, "rewards/frontier_coverage_0": 0.037367334216833116, "rewards/frontier_coverage_1": 0.037367334216833116, "rewards/frontier_coverage_10": 0.037367334216833116, "rewards/frontier_coverage_15": 0.037367334216833116, "rewards/frontier_coverage_20": 0.037367334216833116, "rewards/frontier_coverage_25": 0.037367334216833116, "rewards/frontier_coverage_5": 0.037367334216833116, "signal/accuracy_reward/centered_abs_mean": 0.1502712696790695, "signal/accuracy_reward/group_std_mean": 0.19435304999351502, "signal/accuracy_reward/group_zero_std_frac": 0.46111111640930175, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07513563483953475, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07513563483953475, "signal/advantage_abs_mean": 0.08177377581596375, "signal/advantage_pre_scale_abs_mean": 0.08177377581596375, "signal/advantage_pre_scale_std": 0.1430963695049286, "signal/advantage_std": 0.1430963695049286, "signal/brier_reward/centered_abs_mean": 0.12979107797145845, "signal/brier_reward/group_std_mean": 0.16742580831050874, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012979108095169067, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012979108095169067, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031050733104348183, "signal/confidence_uniqueness_reward/group_std_mean": 0.046174564957618715, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00310507332906127, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00310507332906127, "signal/format_reward/centered_abs_mean": 0.008848741184920073, "signal/format_reward/group_std_mean": 0.01954154595732689, "signal/format_reward/group_zero_std_frac": 0.9055555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004424370592460036, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004424370592460036, "signal/frontier_coverage_0/centered_abs_mean": 0.13977960646152496, "signal/frontier_coverage_0/group_std_mean": 0.19083205461502076, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_1/centered_abs_mean": 0.13977960646152496, "signal/frontier_coverage_1/group_std_mean": 0.19083205461502076, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_10/centered_abs_mean": 0.13977960646152496, "signal/frontier_coverage_10/group_std_mean": 0.19083205461502076, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_15/centered_abs_mean": 0.13977960646152496, "signal/frontier_coverage_15/group_std_mean": 0.19083205461502076, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_20/centered_abs_mean": 0.13977960646152496, "signal/frontier_coverage_20/group_std_mean": 0.19083205461502076, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_25/centered_abs_mean": 0.13977960646152496, "signal/frontier_coverage_25/group_std_mean": 0.19083205461502076, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_5/centered_abs_mean": 0.13977960646152496, "signal/frontier_coverage_5/group_std_mean": 0.19083205461502076, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001998848305083811, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001998848305083811, "step": 115 }, { "calibration/aurc": 0.2399620098631861, "calibration/batch_distribution_entropy": 0.9100376139400339, "calibration/buffer_distribution_entropy": 0.8002528654915221, "calibration/confidence_entropy": 0.45853235863182074, "calibration/coverage@0%": 0.08355148342059338, "calibration/coverage@1%": 0.08355148342059338, "calibration/coverage@10%": 0.3862074607329843, "calibration/coverage@15%": 0.47245582460732977, "calibration/coverage@20%": 0.5226030759162303, "calibration/coverage@25%": 0.558074280104712, "calibration/coverage@30%": 0.6717141143106458, "calibration/coverage@5%": 0.21099476439790577, "calibration/ece": 0.16576287831460218, "calibration/mean_confidence": 0.6249067958530972, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006510416666666674, "completions/max_length": 3755.8, "completions/max_terminated_length": 3755.8, "completions/mean_length": 715.130029296875, "completions/mean_terminated_length": 719.80126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 202.6, "epoch": 0.28799640004499943, "grad_norm": 0.00040327879833057523, "learning_rate": 2.6506024096385547e-06, "loss": -0.0054, "num_tokens": 255679001.0, "reward": 1.0224328994750977, "reward_std": 0.10297303348779678, "rewards/accuracy_reward": 0.6940972328186035, "rewards/brier_reward": 0.8147106170654297, "rewards/confidence_uniqueness_reward": 0.9446017503738403, "rewards/format_reward": 0.9934895753860473, "rewards/frontier_coverage_0": 0.027055247500538827, "rewards/frontier_coverage_1": 0.027055247500538827, "rewards/frontier_coverage_10": 0.027055247500538827, "rewards/frontier_coverage_15": 0.027055247500538827, "rewards/frontier_coverage_20": 0.027055247500538827, "rewards/frontier_coverage_25": 0.027055247500538827, "rewards/frontier_coverage_5": 0.027055247500538827, "signal/accuracy_reward/centered_abs_mean": 0.14014756679534912, "signal/accuracy_reward/group_std_mean": 0.18854482769966124, "signal/accuracy_reward/group_zero_std_frac": 0.4555555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07007378339767456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07007378339767456, "signal/advantage_abs_mean": 0.07467132210731506, "signal/advantage_pre_scale_abs_mean": 0.07467132210731506, "signal/advantage_pre_scale_std": 0.13559393733739852, "signal/advantage_std": 0.13559393733739852, "signal/brier_reward/centered_abs_mean": 0.12669853121042252, "signal/brier_reward/group_std_mean": 0.16605999767780305, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012669852934777737, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012669852934777737, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03032144792377949, "signal/confidence_uniqueness_reward/group_std_mean": 0.04388071969151497, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030321448110044004, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030321448110044004, "signal/format_reward/centered_abs_mean": 0.010639105830341578, "signal/format_reward/group_std_mean": 0.01954000908881426, "signal/format_reward/group_zero_std_frac": 0.919444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005319552915170789, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005319552915170789, "signal/frontier_coverage_0/centered_abs_mean": 0.1568516492843628, "signal/frontier_coverage_0/group_std_mean": 0.21309016048908233, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_1/centered_abs_mean": 0.1568516492843628, "signal/frontier_coverage_1/group_std_mean": 0.21309016048908233, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_10/centered_abs_mean": 0.1568516492843628, "signal/frontier_coverage_10/group_std_mean": 0.21309016048908233, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_15/centered_abs_mean": 0.1568516492843628, "signal/frontier_coverage_15/group_std_mean": 0.21309016048908233, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_20/centered_abs_mean": 0.1568516492843628, "signal/frontier_coverage_20/group_std_mean": 0.21309016048908233, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_25/centered_abs_mean": 0.1568516492843628, "signal/frontier_coverage_25/group_std_mean": 0.21309016048908233, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_5/centered_abs_mean": 0.1568516492843628, "signal/frontier_coverage_5/group_std_mean": 0.21309016048908233, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022429785691201686, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022429785691201686, "step": 120 }, { "calibration/aurc": 0.149045402492001, "calibration/batch_distribution_entropy": 0.7272813670961927, "calibration/buffer_distribution_entropy": 0.8062674007120527, "calibration/confidence_entropy": 0.3572362759071208, "calibration/coverage@0%": 0.033903769841269846, "calibration/coverage@1%": 0.033903769841269846, "calibration/coverage@10%": 0.38658952919723155, "calibration/coverage@15%": 0.5611592033099867, "calibration/coverage@20%": 0.7252933019741113, "calibration/coverage@25%": 0.8832031735670769, "calibration/coverage@30%": 0.9783813909956208, "calibration/coverage@5%": 0.08807043650793651, "calibration/ece": 0.12568334858621172, "calibration/mean_confidence": 0.7540773271048653, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0043402777777777676, "completions/max_length": 3024.4, "completions/max_terminated_length": 3024.4, "completions/mean_length": 711.7556518554687, "completions/mean_terminated_length": 714.9013427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.2999962500468744, "grad_norm": 0.0005117288092151284, "learning_rate": 2.5e-06, "loss": -0.005, "num_tokens": 266996090.0, "reward": 1.029345703125, "reward_std": 0.109001125395298, "rewards/accuracy_reward": 0.7084201455116272, "rewards/brier_reward": 0.823661994934082, "rewards/confidence_uniqueness_reward": 0.9209244966506958, "rewards/format_reward": 0.9956597208976745, "rewards/frontier_coverage_0": 0.02844259552657604, "rewards/frontier_coverage_1": 0.02844259552657604, "rewards/frontier_coverage_10": 0.02844259552657604, "rewards/frontier_coverage_15": 0.02844259552657604, "rewards/frontier_coverage_20": 0.02844259552657604, "rewards/frontier_coverage_25": 0.02844259552657604, "rewards/frontier_coverage_5": 0.02844259552657604, "signal/accuracy_reward/centered_abs_mean": 0.14586045742034912, "signal/accuracy_reward/group_std_mean": 0.19265780448913575, "signal/accuracy_reward/group_zero_std_frac": 0.4555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07293022871017456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07293022871017456, "signal/advantage_abs_mean": 0.08043320327997208, "signal/advantage_pre_scale_abs_mean": 0.08043320327997208, "signal/advantage_pre_scale_std": 0.14412462413311006, "signal/advantage_std": 0.14412462413311006, "signal/brier_reward/centered_abs_mean": 0.12533471435308458, "signal/brier_reward/group_std_mean": 0.166182804107666, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01253347136080265, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01253347136080265, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04015061669051647, "signal/confidence_uniqueness_reward/group_std_mean": 0.05653024539351463, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004015061818063259, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004015061818063259, "signal/format_reward/centered_abs_mean": 0.007953558850567788, "signal/format_reward/group_std_mean": 0.01796704400330782, "signal/format_reward/group_zero_std_frac": 0.9138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003976779425283894, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003976779425283894, "signal/frontier_coverage_0/centered_abs_mean": 0.11626611351966858, "signal/frontier_coverage_0/group_std_mean": 0.16440567672252654, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_1/centered_abs_mean": 0.11626611351966858, "signal/frontier_coverage_1/group_std_mean": 0.16440567672252654, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_10/centered_abs_mean": 0.11626611351966858, "signal/frontier_coverage_10/group_std_mean": 0.16440567672252654, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_15/centered_abs_mean": 0.11626611351966858, "signal/frontier_coverage_15/group_std_mean": 0.16440567672252654, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_20/centered_abs_mean": 0.11626611351966858, "signal/frontier_coverage_20/group_std_mean": 0.16440567672252654, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_25/centered_abs_mean": 0.11626611351966858, "signal/frontier_coverage_25/group_std_mean": 0.16440567672252654, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_5/centered_abs_mean": 0.11626611351966858, "signal/frontier_coverage_5/group_std_mean": 0.16440567672252654, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016626053722575307, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016626053722575307, "step": 125 }, { "calibration/aurc": 0.2498398374245417, "calibration/batch_distribution_entropy": 0.5941785632997885, "calibration/buffer_distribution_entropy": 0.8031562502076233, "calibration/confidence_entropy": 0.2781564688238765, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.06931798188510341, "calibration/coverage@15%": 0.21769957293228143, "calibration/coverage@20%": 0.41865692715334324, "calibration/coverage@25%": 0.576949097598052, "calibration/coverage@30%": 0.82877011358758, "calibration/coverage@5%": 0.0, "calibration/ece": 0.21146582076672024, "calibration/mean_confidence": 0.8089417351778607, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666674, "completions/max_length": 3381.8, "completions/max_terminated_length": 3381.8, "completions/mean_length": 746.9890747070312, "completions/mean_terminated_length": 756.1432006835937, "completions/min_length": 0.0, "completions/min_terminated_length": 188.8, "epoch": 0.3119961000487494, "grad_norm": 0.0005842032842338085, "learning_rate": 2.349397590361446e-06, "loss": -0.0114, "num_tokens": 278726204.0, "reward": 0.9973063111305237, "reward_std": 0.13256770521402358, "rewards/accuracy_reward": 0.6662326455116272, "rewards/brier_reward": 0.7879875302314758, "rewards/confidence_uniqueness_reward": 0.8794443368911743, "rewards/format_reward": 0.9878472089767456, "rewards/frontier_coverage_0": 0.03522439245134592, "rewards/frontier_coverage_1": 0.03522439245134592, "rewards/frontier_coverage_10": 0.03522439245134592, "rewards/frontier_coverage_15": 0.03522439245134592, "rewards/frontier_coverage_20": 0.03522439245134592, "rewards/frontier_coverage_25": 0.03502925205975771, "rewards/frontier_coverage_5": 0.03522439245134592, "signal/accuracy_reward/centered_abs_mean": 0.1634385883808136, "signal/accuracy_reward/group_std_mean": 0.21186422407627106, "signal/accuracy_reward/group_zero_std_frac": 0.4138888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0817192941904068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0817192941904068, "signal/advantage_abs_mean": 0.09865063428878784, "signal/advantage_pre_scale_abs_mean": 0.09865063428878784, "signal/advantage_pre_scale_std": 0.17027110159397124, "signal/advantage_std": 0.17027110159397124, "signal/brier_reward/centered_abs_mean": 0.1411220982670784, "signal/brier_reward/group_std_mean": 0.18561237156391144, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01411221083253622, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01411221083253622, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06204545646905899, "signal/confidence_uniqueness_reward/group_std_mean": 0.08459463864564895, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006204545777291059, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006204545777291059, "signal/format_reward/centered_abs_mean": 0.01944444477558136, "signal/format_reward/group_std_mean": 0.03500307872891426, "signal/format_reward/group_zero_std_frac": 0.8583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00972222238779068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00972222238779068, "signal/frontier_coverage_0/centered_abs_mean": 0.08938175737857819, "signal/frontier_coverage_0/group_std_mean": 0.13144375383853912, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_1/centered_abs_mean": 0.08938175737857819, "signal/frontier_coverage_1/group_std_mean": 0.13144375383853912, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_10/centered_abs_mean": 0.08938175737857819, "signal/frontier_coverage_10/group_std_mean": 0.13144375383853912, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_15/centered_abs_mean": 0.08938175737857819, "signal/frontier_coverage_15/group_std_mean": 0.13144375383853912, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_20/centered_abs_mean": 0.08938175737857819, "signal/frontier_coverage_20/group_std_mean": 0.13144375383853912, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_25/centered_abs_mean": 0.08872648626565934, "signal/frontier_coverage_25/group_std_mean": 0.13051423877477647, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012687887530773879, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012687887530773879, "signal/frontier_coverage_5/centered_abs_mean": 0.08938175737857819, "signal/frontier_coverage_5/group_std_mean": 0.13144375383853912, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012781591154634952, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012781591154634952, "step": 130 }, { "calibration/aurc": 0.20665346427154546, "calibration/batch_distribution_entropy": 0.5376576985579862, "calibration/buffer_distribution_entropy": 0.7984151845186054, "calibration/confidence_entropy": 0.27916214521537636, "calibration/coverage@0%": 0.06096666666666667, "calibration/coverage@1%": 0.07971666666666667, "calibration/coverage@10%": 0.2754303664921466, "calibration/coverage@15%": 0.4312977312390925, "calibration/coverage@20%": 0.4989975567190227, "calibration/coverage@25%": 0.6582754683107096, "calibration/coverage@30%": 0.7717496206580728, "calibration/coverage@5%": 0.2133375, "calibration/ece": 0.17591854873155563, "calibration/mean_confidence": 0.8414116772058632, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007986111111111093, "completions/max_length": 3697.2, "completions/max_terminated_length": 3697.2, "completions/mean_length": 741.0501708984375, "completions/mean_terminated_length": 747.028369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.32399595005062437, "grad_norm": 0.0005643125041387975, "learning_rate": 2.1987951807228917e-06, "loss": -0.0065, "num_tokens": 290356126.0, "reward": 1.0172087907791139, "reward_std": 0.11705570220947266, "rewards/accuracy_reward": 0.7008680462837219, "rewards/brier_reward": 0.8044859409332276, "rewards/confidence_uniqueness_reward": 0.8768606543540954, "rewards/format_reward": 0.9919270873069763, "rewards/frontier_coverage_0": 0.026983942463994028, "rewards/frontier_coverage_1": 0.026983942463994028, "rewards/frontier_coverage_10": 0.026983942463994028, "rewards/frontier_coverage_15": 0.026983942463994028, "rewards/frontier_coverage_20": 0.026983942463994028, "rewards/frontier_coverage_25": 0.025268368422985077, "rewards/frontier_coverage_5": 0.026983942463994028, "signal/accuracy_reward/centered_abs_mean": 0.1438259571790695, "signal/accuracy_reward/group_std_mean": 0.19028924107551576, "signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07191297858953476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07191297858953476, "signal/advantage_abs_mean": 0.087016960978508, "signal/advantage_pre_scale_abs_mean": 0.087016960978508, "signal/advantage_pre_scale_std": 0.15853114426136017, "signal/advantage_std": 0.15853114426136017, "signal/brier_reward/centered_abs_mean": 0.1297492727637291, "signal/brier_reward/group_std_mean": 0.17122452557086945, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012974927946925164, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012974927946925164, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05996449142694473, "signal/confidence_uniqueness_reward/group_std_mean": 0.07999310791492462, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059964492917060856, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059964492917060856, "signal/format_reward/centered_abs_mean": 0.01330837681889534, "signal/format_reward/group_std_mean": 0.02422129511833191, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00665418840944767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00665418840944767, "signal/frontier_coverage_0/centered_abs_mean": 0.07249849885702134, "signal/frontier_coverage_0/group_std_mean": 0.11098081171512604, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_1/centered_abs_mean": 0.07249849885702134, "signal/frontier_coverage_1/group_std_mean": 0.11098081171512604, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_10/centered_abs_mean": 0.07249849885702134, "signal/frontier_coverage_10/group_std_mean": 0.11098081171512604, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_15/centered_abs_mean": 0.07249849885702134, "signal/frontier_coverage_15/group_std_mean": 0.11098081171512604, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_20/centered_abs_mean": 0.07249849885702134, "signal/frontier_coverage_20/group_std_mean": 0.11098081171512604, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_25/centered_abs_mean": 0.06090174987912178, "signal/frontier_coverage_25/group_std_mean": 0.09414784163236618, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008708950132131577, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008708950132131577, "signal/frontier_coverage_5/centered_abs_mean": 0.07249849885702134, "signal/frontier_coverage_5/group_std_mean": 0.11098081171512604, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010367285343818367, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010367285343818367, "step": 135 }, { "calibration/aurc": 0.13807296148494613, "calibration/batch_distribution_entropy": 0.5912922286361493, "calibration/buffer_distribution_entropy": 0.7990959992248101, "calibration/confidence_entropy": 0.3053410751404527, "calibration/coverage@0%": 0.018230563002680965, "calibration/coverage@1%": 0.018230563002680965, "calibration/coverage@10%": 0.47200026952869323, "calibration/coverage@15%": 0.6143298179119846, "calibration/coverage@20%": 0.7965660023121732, "calibration/coverage@25%": 0.9006813786793784, "calibration/coverage@30%": 0.9573407557430865, "calibration/coverage@5%": 0.19797422633505762, "calibration/ece": 0.15729955684106034, "calibration/mean_confidence": 0.827820182834459, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011197916666666674, "completions/max_length": 3481.4, "completions/max_terminated_length": 3481.4, "completions/mean_length": 742.7499145507812, "completions/mean_terminated_length": 751.1475830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.33599580005249935, "grad_norm": 0.0004445806553121656, "learning_rate": 2.0481927710843377e-06, "loss": -0.0094, "num_tokens": 302016829.0, "reward": 1.0130672454833984, "reward_std": 0.1154438465833664, "rewards/accuracy_reward": 0.6948784708976745, "rewards/brier_reward": 0.8031974673271179, "rewards/confidence_uniqueness_reward": 0.883224892616272, "rewards/format_reward": 0.9887152791023255, "rewards/frontier_coverage_0": 0.026858755480498077, "rewards/frontier_coverage_1": 0.026858755480498077, "rewards/frontier_coverage_10": 0.026858755480498077, "rewards/frontier_coverage_15": 0.026858755480498077, "rewards/frontier_coverage_20": 0.026858755480498077, "rewards/frontier_coverage_25": 0.02263545459136367, "rewards/frontier_coverage_5": 0.026858755480498077, "signal/accuracy_reward/centered_abs_mean": 0.12689344882965087, "signal/accuracy_reward/group_std_mean": 0.17651031613349916, "signal/accuracy_reward/group_zero_std_frac": 0.4694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06344672441482543, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06344672441482543, "signal/advantage_abs_mean": 0.08137730211019516, "signal/advantage_pre_scale_abs_mean": 0.08137730211019516, "signal/advantage_pre_scale_std": 0.1559952199459076, "signal/advantage_std": 0.1559952199459076, "signal/brier_reward/centered_abs_mean": 0.12170794308185577, "signal/brier_reward/group_std_mean": 0.16413157284259797, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0121707946062088, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0121707946062088, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05902692675590515, "signal/confidence_uniqueness_reward/group_std_mean": 0.08175166472792625, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005902692675590515, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005902692675590515, "signal/format_reward/centered_abs_mean": 0.016840277798473836, "signal/format_reward/group_std_mean": 0.03128794245421886, "signal/format_reward/group_zero_std_frac": 0.8750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008420138899236918, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008420138899236918, "signal/frontier_coverage_0/centered_abs_mean": 0.06246669292449951, "signal/frontier_coverage_0/group_std_mean": 0.09620369970798492, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_1/centered_abs_mean": 0.06246669292449951, "signal/frontier_coverage_1/group_std_mean": 0.09620369970798492, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_10/centered_abs_mean": 0.06246669292449951, "signal/frontier_coverage_10/group_std_mean": 0.09620369970798492, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_15/centered_abs_mean": 0.06246669292449951, "signal/frontier_coverage_15/group_std_mean": 0.09620369970798492, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_20/centered_abs_mean": 0.06246669292449951, "signal/frontier_coverage_20/group_std_mean": 0.09620369970798492, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_25/centered_abs_mean": 0.045351064205169676, "signal/frontier_coverage_25/group_std_mean": 0.07125220820307732, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006485202291514724, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006485202291514724, "signal/frontier_coverage_5/centered_abs_mean": 0.06246669292449951, "signal/frontier_coverage_5/group_std_mean": 0.09620369970798492, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008932736935093998, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008932736935093998, "step": 140 }, { "calibration/aurc": 0.15873093506050334, "calibration/batch_distribution_entropy": 0.7353718083783116, "calibration/buffer_distribution_entropy": 0.8072198353385431, "calibration/confidence_entropy": 0.35873181163031825, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.06219839142091153, "calibration/coverage@10%": 0.39977334249221996, "calibration/coverage@15%": 0.5484208791798558, "calibration/coverage@20%": 0.7012722080481055, "calibration/coverage@25%": 0.7817749343832021, "calibration/coverage@30%": 0.8312116584385285, "calibration/coverage@5%": 0.2308831483301228, "calibration/ece": 0.16048376103259138, "calibration/mean_confidence": 0.7810990624058293, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011197916666666651, "completions/max_length": 3506.4, "completions/max_terminated_length": 3506.4, "completions/mean_length": 735.0325561523438, "completions/mean_terminated_length": 743.3595581054688, "completions/min_length": 0.0, "completions/min_terminated_length": 229.2, "epoch": 0.34799565005437433, "grad_norm": 0.0003804602602031082, "learning_rate": 1.8975903614457832e-06, "loss": -0.0092, "num_tokens": 313549012.0, "reward": 1.0284262537956237, "reward_std": 0.11307135671377182, "rewards/accuracy_reward": 0.7163194417953491, "rewards/brier_reward": 0.8276101469993591, "rewards/confidence_uniqueness_reward": 0.9076523542404175, "rewards/format_reward": 0.9887152791023255, "rewards/frontier_coverage_0": 0.02373262830078602, "rewards/frontier_coverage_1": 0.02373262830078602, "rewards/frontier_coverage_10": 0.02373262830078602, "rewards/frontier_coverage_15": 0.02373262830078602, "rewards/frontier_coverage_20": 0.023653368651866912, "rewards/frontier_coverage_25": 0.024303621798753738, "rewards/frontier_coverage_5": 0.02373262830078602, "signal/accuracy_reward/centered_abs_mean": 0.13071831464767455, "signal/accuracy_reward/group_std_mean": 0.17358810007572173, "signal/accuracy_reward/group_zero_std_frac": 0.49444445967674255, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06535915732383728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06535915732383728, "signal/advantage_abs_mean": 0.08121174573898315, "signal/advantage_pre_scale_abs_mean": 0.08121174573898315, "signal/advantage_pre_scale_std": 0.15750052332878112, "signal/advantage_std": 0.15750052332878112, "signal/brier_reward/centered_abs_mean": 0.11548198312520981, "signal/brier_reward/group_std_mean": 0.1539760112762451, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011548198573291302, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011548198573291302, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0547367163002491, "signal/confidence_uniqueness_reward/group_std_mean": 0.07798558920621872, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005473671574145555, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005473671574145555, "signal/format_reward/centered_abs_mean": 0.019759114272892474, "signal/format_reward/group_std_mean": 0.0363234143704176, "signal/format_reward/group_zero_std_frac": 0.8555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009879557136446237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009879557136446237, "signal/frontier_coverage_0/centered_abs_mean": 0.07713357806205749, "signal/frontier_coverage_0/group_std_mean": 0.10995967090129852, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_1/centered_abs_mean": 0.07713357806205749, "signal/frontier_coverage_1/group_std_mean": 0.10995967090129852, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_10/centered_abs_mean": 0.07713357806205749, "signal/frontier_coverage_10/group_std_mean": 0.10995967090129852, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_15/centered_abs_mean": 0.07713357806205749, "signal/frontier_coverage_15/group_std_mean": 0.10995967090129852, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_20/centered_abs_mean": 0.06395274251699448, "signal/frontier_coverage_20/group_std_mean": 0.09226414263248443, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009145242162048817, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009145242162048817, "signal/frontier_coverage_25/centered_abs_mean": 0.04583085030317306, "signal/frontier_coverage_25/group_std_mean": 0.06589328721165658, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006553811486810446, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006553811486810446, "signal/frontier_coverage_5/centered_abs_mean": 0.07713357806205749, "signal/frontier_coverage_5/group_std_mean": 0.10995967090129852, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0011030101682990789, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011030101682990789, "step": 145 }, { "calibration/aurc": 0.19848453530632176, "calibration/batch_distribution_entropy": 0.8353200249126764, "calibration/buffer_distribution_entropy": 0.8180778954803165, "calibration/confidence_entropy": 0.4378518057416948, "calibration/coverage@0%": 0.014285714285714285, "calibration/coverage@1%": 0.014285714285714285, "calibration/coverage@10%": 0.3476190476190476, "calibration/coverage@15%": 0.39187699448796054, "calibration/coverage@20%": 0.5605820490577851, "calibration/coverage@25%": 0.6765151459405365, "calibration/coverage@30%": 0.7895631984795912, "calibration/coverage@5%": 0.16589781746031745, "calibration/ece": 0.14407072392639744, "calibration/mean_confidence": 0.7014389534257306, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007986111111111116, "completions/max_length": 3675.6, "completions/max_terminated_length": 3675.6, "completions/mean_length": 805.659130859375, "completions/mean_terminated_length": 812.1679321289063, "completions/min_length": 0.0, "completions/min_terminated_length": 217.2, "epoch": 0.3599955000562493, "grad_norm": 0.0004782957839779556, "learning_rate": 1.7469879518072292e-06, "loss": -0.0064, "num_tokens": 325940541.0, "reward": 1.022280216217041, "reward_std": 0.11486477702856064, "rewards/accuracy_reward": 0.69921875, "rewards/brier_reward": 0.8172156095504761, "rewards/confidence_uniqueness_reward": 0.9295577049255371, "rewards/format_reward": 0.9919270873069763, "rewards/frontier_coverage_0": 0.018185996543616058, "rewards/frontier_coverage_1": 0.018185996543616058, "rewards/frontier_coverage_10": 0.018185996543616058, "rewards/frontier_coverage_15": 0.019136503525078296, "rewards/frontier_coverage_20": 0.02075750511139631, "rewards/frontier_coverage_25": 0.02931526005268097, "rewards/frontier_coverage_5": 0.018185996543616058, "signal/accuracy_reward/centered_abs_mean": 0.14734700322151184, "signal/accuracy_reward/group_std_mean": 0.19579098820686341, "signal/accuracy_reward/group_zero_std_frac": 0.43611112236976624, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07367350161075592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07367350161075592, "signal/advantage_abs_mean": 0.08442184329032898, "signal/advantage_pre_scale_abs_mean": 0.08442184329032898, "signal/advantage_pre_scale_std": 0.15249321460723878, "signal/advantage_std": 0.15249321460723878, "signal/brier_reward/centered_abs_mean": 0.11737381666898727, "signal/brier_reward/group_std_mean": 0.15464959442615508, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011737381666898727, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011737381666898727, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04265345185995102, "signal/confidence_uniqueness_reward/group_std_mean": 0.05998752787709236, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004265345307067037, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004265345307067037, "signal/format_reward/centered_abs_mean": 0.01388346366584301, "signal/format_reward/group_std_mean": 0.02453953940421343, "signal/format_reward/group_zero_std_frac": 0.9055555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006941731832921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006941731832921505, "signal/frontier_coverage_0/centered_abs_mean": 0.10197426676750183, "signal/frontier_coverage_0/group_std_mean": 0.14345729649066924, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001458232058212161, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001458232058212161, "signal/frontier_coverage_1/centered_abs_mean": 0.10197426676750183, "signal/frontier_coverage_1/group_std_mean": 0.14345729649066924, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001458232058212161, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001458232058212161, "signal/frontier_coverage_10/centered_abs_mean": 0.10197426676750183, "signal/frontier_coverage_10/group_std_mean": 0.14345729649066924, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001458232058212161, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001458232058212161, "signal/frontier_coverage_15/centered_abs_mean": 0.0956965520977974, "signal/frontier_coverage_15/group_std_mean": 0.1353534460067749, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001368460664525628, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001368460664525628, "signal/frontier_coverage_20/centered_abs_mean": 0.06615661978721618, "signal/frontier_coverage_20/group_std_mean": 0.09637952744960784, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009460396599024534, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009460396599024534, "signal/frontier_coverage_25/centered_abs_mean": 0.04605967253446579, "signal/frontier_coverage_25/group_std_mean": 0.06603193655610085, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006586533272638917, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006586533272638917, "signal/frontier_coverage_5/centered_abs_mean": 0.10197426676750183, "signal/frontier_coverage_5/group_std_mean": 0.14345729649066924, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001458232058212161, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001458232058212161, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.14445637044652185, "eval_calibration/batch_distribution_entropy": 0.8076510952162764, "eval_calibration/buffer_distribution_entropy": 0.8268265736598052, "eval_calibration/confidence_entropy": 0.4869395872369577, "eval_calibration/coverage@0%": 0.1986111111111111, "eval_calibration/coverage@1%": 0.1986111111111111, "eval_calibration/coverage@10%": 0.4069444444444444, "eval_calibration/coverage@15%": 0.49548611111111107, "eval_calibration/coverage@20%": 0.8232638888888889, "eval_calibration/coverage@25%": 0.9253472222222223, "eval_calibration/coverage@30%": 0.9722222222222223, "eval_calibration/coverage@5%": 0.2767361111111111, "eval_calibration/ece": 0.16583333333333333, "eval_calibration/mean_confidence": 0.7061388888888889, "eval_completions/clipped_ratio": 0.013888888888888876, "eval_completions/max_length": 2670.0, "eval_completions/max_terminated_length": 2670.0, "eval_completions/mean_length": 768.7396036783854, "eval_completions/mean_terminated_length": 779.64697265625, "eval_completions/min_length": 55.166666666666664, "eval_completions/min_terminated_length": 256.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 325940541.0, "eval_reward": 1.0079765021800995, "eval_reward_std": 0.2547302494446437, "eval_rewards/accuracy_reward": 0.6840277711550394, "eval_rewards/brier_reward": 0.8177569707234701, "eval_rewards/confidence_uniqueness_reward": 0.8829086720943451, "eval_rewards/format_reward": 0.9861111044883728, "eval_rewards/frontier_coverage_0": 0.02693510102108121, "eval_rewards/frontier_coverage_1": 0.02693510102108121, "eval_rewards/frontier_coverage_10": 0.02693510102108121, "eval_rewards/frontier_coverage_15": 0.027094673210134108, "eval_rewards/frontier_coverage_20": 0.028464287829895813, "eval_rewards/frontier_coverage_25": 0.03533406959225734, "eval_rewards/frontier_coverage_5": 0.02693510102108121, "eval_runtime": 196.1658, "eval_samples_per_second": 5.098, "eval_signal/accuracy_reward/centered_abs_mean": 0.4182942708333333, "eval_signal/accuracy_reward/group_std_mean": 0.4631828914086024, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20914713541666666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20914713541666666, "eval_signal/advantage_abs_mean": 0.2190506507953008, "eval_signal/advantage_pre_scale_abs_mean": 0.2190506507953008, "eval_signal/advantage_pre_scale_std": 0.25378915170828503, "eval_signal/advantage_std": 0.25378915170828503, "eval_signal/brier_reward/centered_abs_mean": 0.17790956050157547, "eval_signal/brier_reward/group_std_mean": 0.23551630725463232, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017790956267466147, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.017790956267466147, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05993440312643846, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1008106352140506, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005993440669650833, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005993440669650833, "eval_signal/format_reward/centered_abs_mean": 0.026692708022892475, "eval_signal/format_reward/group_std_mean": 0.07258860394358635, "eval_signal/format_reward/group_zero_std_frac": 0.6111111243565878, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013346354011446238, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.013346354011446238, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.16356521099805832, "eval_signal/frontier_coverage_0/group_std_mean": 0.2565693234403928, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002338982536457479, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002338982536457479, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.16356521099805832, "eval_signal/frontier_coverage_1/group_std_mean": 0.2565693234403928, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002338982536457479, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002338982536457479, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.16356521099805832, "eval_signal/frontier_coverage_10/group_std_mean": 0.2565693234403928, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002338982536457479, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002338982536457479, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.14471895496050516, "eval_signal/frontier_coverage_15/group_std_mean": 0.232371523976326, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002069481047025571, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002069481047025571, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.09058589860796928, "eval_signal/frontier_coverage_20/group_std_mean": 0.1578061431646347, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012953783734701574, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012953783734701574, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.05932698274652163, "eval_signal/frontier_coverage_25/group_std_mean": 0.09397069240609805, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008483757652963201, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008483757652963201, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.16356521099805832, "eval_signal/frontier_coverage_5/group_std_mean": 0.2565693234403928, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002338982536457479, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002338982536457479, "eval_steps_per_second": 0.031, "step": 150 }, { "calibration/aurc": 0.13392662799561467, "calibration/batch_distribution_entropy": 0.8691829933423222, "calibration/buffer_distribution_entropy": 0.831859476416979, "calibration/confidence_entropy": 0.48191132883935534, "calibration/coverage@0%": 0.018617021276595744, "calibration/coverage@1%": 0.07606382978723404, "calibration/coverage@10%": 0.4882390495713171, "calibration/coverage@15%": 0.6851489520838969, "calibration/coverage@20%": 0.7909027950912767, "calibration/coverage@25%": 0.8724092080113021, "calibration/coverage@30%": 0.9416853652455746, "calibration/coverage@5%": 0.17503898852623373, "calibration/ece": 0.1028043887841402, "calibration/mean_confidence": 0.693157207237826, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007812500000000023, "completions/max_length": 3306.4, "completions/max_terminated_length": 3306.4, "completions/mean_length": 744.222314453125, "completions/mean_terminated_length": 750.1518798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 228.2, "epoch": 0.3719953500581243, "grad_norm": 0.0004585942951962352, "learning_rate": 1.5963855421686747e-06, "loss": -0.0064, "num_tokens": 337621694.0, "reward": 1.0490242481231689, "reward_std": 0.10810918956995011, "rewards/accuracy_reward": 0.7472222328186036, "rewards/brier_reward": 0.8391157627105713, "rewards/confidence_uniqueness_reward": 0.9409691214561462, "rewards/format_reward": 0.9921006798744202, "rewards/frontier_coverage_0": 0.005688181053847075, "rewards/frontier_coverage_1": 0.005688181053847075, "rewards/frontier_coverage_10": 0.005688181053847075, "rewards/frontier_coverage_15": 0.010548211727291345, "rewards/frontier_coverage_20": 0.018851665779948235, "rewards/frontier_coverage_25": 0.04255228638648987, "rewards/frontier_coverage_5": 0.005688181053847075, "signal/accuracy_reward/centered_abs_mean": 0.1384982645511627, "signal/accuracy_reward/group_std_mean": 0.18303903341293334, "signal/accuracy_reward/group_zero_std_frac": 0.47777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06924913227558135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06924913227558135, "signal/advantage_abs_mean": 0.07813455611467361, "signal/advantage_pre_scale_abs_mean": 0.07813455611467361, "signal/advantage_pre_scale_std": 0.14875202775001525, "signal/advantage_std": 0.14875202775001525, "signal/brier_reward/centered_abs_mean": 0.10786259174346924, "signal/brier_reward/group_std_mean": 0.14233016669750215, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01078625936061144, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01078625936061144, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03370047435164451, "signal/confidence_uniqueness_reward/group_std_mean": 0.05102566778659821, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003370047500357032, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003370047500357032, "signal/format_reward/centered_abs_mean": 0.013829210214316845, "signal/format_reward/group_std_mean": 0.026308896765112878, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006914605107158423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006914605107158423, "signal/frontier_coverage_0/centered_abs_mean": 0.11292163282632828, "signal/frontier_coverage_0/group_std_mean": 0.15748831629753113, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001614779350347817, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001614779350347817, "signal/frontier_coverage_1/centered_abs_mean": 0.11292163282632828, "signal/frontier_coverage_1/group_std_mean": 0.15748831629753113, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001614779350347817, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001614779350347817, "signal/frontier_coverage_10/centered_abs_mean": 0.11292163282632828, "signal/frontier_coverage_10/group_std_mean": 0.15748831629753113, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001614779350347817, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001614779350347817, "signal/frontier_coverage_15/centered_abs_mean": 0.09441954791545867, "signal/frontier_coverage_15/group_std_mean": 0.13332569301128389, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013501995243132114, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013501995243132114, "signal/frontier_coverage_20/centered_abs_mean": 0.05975788086652756, "signal/frontier_coverage_20/group_std_mean": 0.08709384799003601, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008545376709662378, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008545376709662378, "signal/frontier_coverage_25/centered_abs_mean": 0.04686888232827187, "signal/frontier_coverage_25/group_std_mean": 0.06351146027445793, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006702250568196178, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006702250568196178, "signal/frontier_coverage_5/centered_abs_mean": 0.11292163282632828, "signal/frontier_coverage_5/group_std_mean": 0.15748831629753113, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001614779350347817, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001614779350347817, "step": 155 }, { "calibration/aurc": 0.1388528080720422, "calibration/batch_distribution_entropy": 0.7976930988131808, "calibration/buffer_distribution_entropy": 0.8404623309625426, "calibration/confidence_entropy": 0.440109510855466, "calibration/coverage@0%": 0.030789157940663177, "calibration/coverage@1%": 0.030789157940663177, "calibration/coverage@10%": 0.604611147469459, "calibration/coverage@15%": 0.7060672993019198, "calibration/coverage@20%": 0.7723958333333333, "calibration/coverage@25%": 0.8401467678100264, "calibration/coverage@30%": 0.8591029023746704, "calibration/coverage@5%": 0.2589359729493892, "calibration/ece": 0.12554189453879194, "calibration/mean_confidence": 0.7421448630163099, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555557, "completions/max_length": 3067.2, "completions/max_terminated_length": 3067.2, "completions/mean_length": 750.00947265625, "completions/mean_terminated_length": 757.798974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 236.6, "epoch": 0.38399520005999926, "grad_norm": 0.0004149152955505997, "learning_rate": 1.4457831325301204e-06, "loss": -0.0077, "num_tokens": 349349099.0, "reward": 1.0139774680137634, "reward_std": 0.10905950218439102, "rewards/accuracy_reward": 0.6820312380790711, "rewards/brier_reward": 0.8137630939483642, "rewards/confidence_uniqueness_reward": 0.9368009090423584, "rewards/format_reward": 0.9897569417953491, "rewards/frontier_coverage_0": 0.025845300406217575, "rewards/frontier_coverage_1": 0.025845300406217575, "rewards/frontier_coverage_10": 0.025845300406217575, "rewards/frontier_coverage_15": 0.025336899049580097, "rewards/frontier_coverage_20": 0.026929372176527976, "rewards/frontier_coverage_25": 0.056025682389736174, "rewards/frontier_coverage_5": 0.025845300406217575, "signal/accuracy_reward/centered_abs_mean": 0.13673502951860428, "signal/accuracy_reward/group_std_mean": 0.17701160609722139, "signal/accuracy_reward/group_zero_std_frac": 0.5055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06836751475930214, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06836751475930214, "signal/advantage_abs_mean": 0.08092275410890579, "signal/advantage_pre_scale_abs_mean": 0.08092275410890579, "signal/advantage_pre_scale_std": 0.15143148899078368, "signal/advantage_std": 0.15143148899078368, "signal/brier_reward/centered_abs_mean": 0.11717015504837036, "signal/brier_reward/group_std_mean": 0.15168921947479247, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011717015691101552, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011717015691101552, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036166638135910034, "signal/confidence_uniqueness_reward/group_std_mean": 0.05395522266626358, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036166639067232607, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036166639067232607, "signal/format_reward/centered_abs_mean": 0.0162109375, "signal/format_reward/group_std_mean": 0.030159536749124527, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00810546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00810546875, "signal/frontier_coverage_0/centered_abs_mean": 0.11039406508207321, "signal/frontier_coverage_0/group_std_mean": 0.15149362683296203, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00157863509375602, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00157863509375602, "signal/frontier_coverage_1/centered_abs_mean": 0.11039406508207321, "signal/frontier_coverage_1/group_std_mean": 0.15149362683296203, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00157863509375602, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00157863509375602, "signal/frontier_coverage_10/centered_abs_mean": 0.11039406508207321, "signal/frontier_coverage_10/group_std_mean": 0.15149362683296203, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00157863509375602, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00157863509375602, "signal/frontier_coverage_15/centered_abs_mean": 0.08255304098129272, "signal/frontier_coverage_15/group_std_mean": 0.11533389985561371, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011805085465312003, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011805085465312003, "signal/frontier_coverage_20/centered_abs_mean": 0.05247567817568779, "signal/frontier_coverage_20/group_std_mean": 0.0738027811050415, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007504021981731057, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007504021981731057, "signal/frontier_coverage_25/centered_abs_mean": 0.05542582124471664, "signal/frontier_coverage_25/group_std_mean": 0.07121084332466125, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000792589201591909, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000792589201591909, "signal/frontier_coverage_5/centered_abs_mean": 0.11039406508207321, "signal/frontier_coverage_5/group_std_mean": 0.15149362683296203, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00157863509375602, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00157863509375602, "step": 160 }, { "calibration/aurc": 0.13946615973860202, "calibration/batch_distribution_entropy": 0.8478804166176523, "calibration/buffer_distribution_entropy": 0.8458874423426573, "calibration/confidence_entropy": 0.44236236460047246, "calibration/coverage@0%": 0.07055610427831982, "calibration/coverage@1%": 0.07055610427831982, "calibration/coverage@10%": 0.45169935353064955, "calibration/coverage@15%": 0.5726540277894626, "calibration/coverage@20%": 0.7059143774537835, "calibration/coverage@25%": 0.7753862294074275, "calibration/coverage@30%": 0.8993705754320949, "calibration/coverage@5%": 0.32451957543285126, "calibration/ece": 0.09116762894764044, "calibration/mean_confidence": 0.6891000581889718, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008333333333333349, "completions/max_length": 3250.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 787.2148559570312, "completions/mean_terminated_length": 793.8334838867188, "completions/min_length": 0.0, "completions/min_terminated_length": 230.6, "epoch": 0.39599505006187424, "grad_norm": 0.00041544463601894677, "learning_rate": 1.2951807228915664e-06, "loss": -0.0046, "num_tokens": 361556886.0, "reward": 1.0158275485038757, "reward_std": 0.10938665568828583, "rewards/accuracy_reward": 0.6794270873069763, "rewards/brier_reward": 0.8199212312698364, "rewards/confidence_uniqueness_reward": 0.940508759021759, "rewards/format_reward": 0.9916666626930237, "rewards/frontier_coverage_0": 0.03606350589543581, "rewards/frontier_coverage_1": 0.03606350589543581, "rewards/frontier_coverage_10": 0.03607259057462216, "rewards/frontier_coverage_15": 0.034527404233813286, "rewards/frontier_coverage_20": 0.03649830408394337, "rewards/frontier_coverage_25": 0.08104953020811081, "rewards/frontier_coverage_5": 0.03606350589543581, "signal/accuracy_reward/centered_abs_mean": 0.13355577141046523, "signal/accuracy_reward/group_std_mean": 0.17831997573375702, "signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06677788570523262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06677788570523262, "signal/advantage_abs_mean": 0.07951956540346146, "signal/advantage_pre_scale_abs_mean": 0.07951956540346146, "signal/advantage_pre_scale_std": 0.14827975630760193, "signal/advantage_std": 0.14827975630760193, "signal/brier_reward/centered_abs_mean": 0.11730274558067322, "signal/brier_reward/group_std_mean": 0.15478793978691102, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011730275116860867, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011730275116860867, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03239181265234947, "signal/confidence_uniqueness_reward/group_std_mean": 0.04922289177775383, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003239181311801076, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003239181311801076, "signal/format_reward/centered_abs_mean": 0.01398654505610466, "signal/format_reward/group_std_mean": 0.026783711090683938, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00699327252805233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00699327252805233, "signal/frontier_coverage_0/centered_abs_mean": 0.11483795940876007, "signal/frontier_coverage_0/group_std_mean": 0.1570892423391342, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016421828418970107, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016421828418970107, "signal/frontier_coverage_1/centered_abs_mean": 0.11483795940876007, "signal/frontier_coverage_1/group_std_mean": 0.1570892423391342, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016421828418970107, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016421828418970107, "signal/frontier_coverage_10/centered_abs_mean": 0.11440861821174622, "signal/frontier_coverage_10/group_std_mean": 0.15652381181716918, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016360432375222445, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016360432375222445, "signal/frontier_coverage_15/centered_abs_mean": 0.08434305042028427, "signal/frontier_coverage_15/group_std_mean": 0.11699189096689225, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001206105574965477, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001206105574965477, "signal/frontier_coverage_20/centered_abs_mean": 0.05325452834367752, "signal/frontier_coverage_20/group_std_mean": 0.07273693531751632, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007615397684276104, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007615397684276104, "signal/frontier_coverage_25/centered_abs_mean": 0.06348953396081924, "signal/frontier_coverage_25/group_std_mean": 0.08162465393543243, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009079003590159118, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009079003590159118, "signal/frontier_coverage_5/centered_abs_mean": 0.11483795940876007, "signal/frontier_coverage_5/group_std_mean": 0.1570892423391342, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016421828418970107, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016421828418970107, "step": 165 }, { "calibration/aurc": 0.11345005897139374, "calibration/batch_distribution_entropy": 0.706185091262855, "calibration/buffer_distribution_entropy": 0.8465500743068246, "calibration/confidence_entropy": 0.36535344265534453, "calibration/coverage@0%": 0.04657404353562005, "calibration/coverage@1%": 0.04657404353562005, "calibration/coverage@10%": 0.5926623947732128, "calibration/coverage@15%": 0.6906030995658305, "calibration/coverage@20%": 0.8280044496574324, "calibration/coverage@25%": 0.9148695822162646, "calibration/coverage@30%": 0.9685182082216265, "calibration/coverage@5%": 0.3362439533861038, "calibration/ece": 0.07788768102113267, "calibration/mean_confidence": 0.7964747611011322, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008072916666666674, "completions/max_length": 3205.2, "completions/max_terminated_length": 3205.2, "completions/mean_length": 751.2134521484375, "completions/mean_terminated_length": 757.3519409179687, "completions/min_length": 0.0, "completions/min_terminated_length": 216.4, "epoch": 0.4079949000637492, "grad_norm": 0.00043807283509522676, "learning_rate": 1.1445783132530121e-06, "loss": -0.0052, "num_tokens": 373300049.0, "reward": 1.0416455030441285, "reward_std": 0.10653368085622787, "rewards/accuracy_reward": 0.7301215171813965, "rewards/brier_reward": 0.8392017483711243, "rewards/confidence_uniqueness_reward": 0.9308984518051148, "rewards/format_reward": 0.9919270873069763, "rewards/frontier_coverage_0": 0.020954896369948982, "rewards/frontier_coverage_1": 0.020954896369948982, "rewards/frontier_coverage_10": 0.02102845092304051, "rewards/frontier_coverage_15": 0.02324553709477186, "rewards/frontier_coverage_20": 0.03625187166035175, "rewards/frontier_coverage_25": 0.10913793593645096, "rewards/frontier_coverage_5": 0.020954896369948982, "signal/accuracy_reward/centered_abs_mean": 0.13088650107383729, "signal/accuracy_reward/group_std_mean": 0.17503231167793273, "signal/accuracy_reward/group_zero_std_frac": 0.4972222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06544325053691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06544325053691864, "signal/advantage_abs_mean": 0.07733463197946548, "signal/advantage_pre_scale_abs_mean": 0.07733463197946548, "signal/advantage_pre_scale_std": 0.1462598502635956, "signal/advantage_std": 0.1462598502635956, "signal/brier_reward/centered_abs_mean": 0.11199976354837418, "signal/brier_reward/group_std_mean": 0.14869881868362428, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011199977062642575, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011199977062642575, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03646374717354774, "signal/confidence_uniqueness_reward/group_std_mean": 0.053655432909727095, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003646374773234129, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003646374773234129, "signal/format_reward/centered_abs_mean": 0.012668185867369175, "signal/format_reward/group_std_mean": 0.024041558802127837, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0063340929336845875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0063340929336845875, "signal/frontier_coverage_0/centered_abs_mean": 0.0988058403134346, "signal/frontier_coverage_0/group_std_mean": 0.13648533821105957, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014129235176369547, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014129235176369547, "signal/frontier_coverage_1/centered_abs_mean": 0.0988058403134346, "signal/frontier_coverage_1/group_std_mean": 0.13648533821105957, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014129235176369547, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014129235176369547, "signal/frontier_coverage_10/centered_abs_mean": 0.09827619642019272, "signal/frontier_coverage_10/group_std_mean": 0.13578663468360902, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014053495600819589, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014053495600819589, "signal/frontier_coverage_15/centered_abs_mean": 0.07011277079582215, "signal/frontier_coverage_15/group_std_mean": 0.09825922846794129, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010026126867160202, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010026126867160202, "signal/frontier_coverage_20/centered_abs_mean": 0.04608847498893738, "signal/frontier_coverage_20/group_std_mean": 0.06254973709583282, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006590651930309833, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006590651930309833, "signal/frontier_coverage_25/centered_abs_mean": 0.0696468323469162, "signal/frontier_coverage_25/group_std_mean": 0.08956842720508576, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009959497139789164, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009959497139789164, "signal/frontier_coverage_5/centered_abs_mean": 0.0988058403134346, "signal/frontier_coverage_5/group_std_mean": 0.13648533821105957, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014129235176369547, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014129235176369547, "step": 170 }, { "calibration/aurc": 0.10976473305508475, "calibration/batch_distribution_entropy": 0.7718932038406594, "calibration/buffer_distribution_entropy": 0.8409602787241589, "calibration/confidence_entropy": 0.38561692641318424, "calibration/coverage@0%": 0.05462301483420593, "calibration/coverage@1%": 0.05462301483420593, "calibration/coverage@10%": 0.5865572334220591, "calibration/coverage@15%": 0.7722409170414061, "calibration/coverage@20%": 0.8814547125793206, "calibration/coverage@25%": 0.9438745800671893, "calibration/coverage@30%": 0.9824972004479283, "calibration/coverage@5%": 0.17130578097731236, "calibration/ece": 0.07796374556939645, "calibration/mean_confidence": 0.7677826396168446, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009809027777777767, "completions/max_length": 3857.4, "completions/max_terminated_length": 3857.4, "completions/mean_length": 793.6175415039063, "completions/mean_terminated_length": 801.4669189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 211.4, "epoch": 0.4199947500656242, "grad_norm": 0.0005034080822952092, "learning_rate": 9.93975903614458e-07, "loss": -0.0094, "num_tokens": 385550491.0, "reward": 1.0352750539779663, "reward_std": 0.11443682610988617, "rewards/accuracy_reward": 0.7231770873069763, "rewards/brier_reward": 0.8273264408111572, "rewards/confidence_uniqueness_reward": 0.92387535572052, "rewards/format_reward": 0.9900173544883728, "rewards/frontier_coverage_0": 0.01753472238779068, "rewards/frontier_coverage_1": 0.01753472238779068, "rewards/frontier_coverage_10": 0.017634299769997596, "rewards/frontier_coverage_15": 0.01905247466638684, "rewards/frontier_coverage_20": 0.037164825946092606, "rewards/frontier_coverage_25": 0.12233072817325592, "rewards/frontier_coverage_5": 0.01753472238779068, "signal/accuracy_reward/centered_abs_mean": 0.13874240219593048, "signal/accuracy_reward/group_std_mean": 0.18652132749557496, "signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06937120109796524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06937120109796524, "signal/advantage_abs_mean": 0.0822924718260765, "signal/advantage_pre_scale_abs_mean": 0.0822924718260765, "signal/advantage_pre_scale_std": 0.15624974370002748, "signal/advantage_std": 0.15624974370002748, "signal/brier_reward/centered_abs_mean": 0.11687278598546982, "signal/brier_reward/group_std_mean": 0.1550233006477356, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011687278375029564, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011687278375029564, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04072035998106003, "signal/confidence_uniqueness_reward/group_std_mean": 0.05919913575053215, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004072036035358906, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004072036035358906, "signal/format_reward/centered_abs_mean": 0.01610785573720932, "signal/format_reward/group_std_mean": 0.02907128185033798, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00805392786860466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00805392786860466, "signal/frontier_coverage_0/centered_abs_mean": 0.09476696848869323, "signal/frontier_coverage_0/group_std_mean": 0.13635447770357131, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013551676413044334, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013551676413044334, "signal/frontier_coverage_1/centered_abs_mean": 0.09476696848869323, "signal/frontier_coverage_1/group_std_mean": 0.13635447770357131, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013551676413044334, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013551676413044334, "signal/frontier_coverage_10/centered_abs_mean": 0.0943188950419426, "signal/frontier_coverage_10/group_std_mean": 0.13574583530426027, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013487601187080144, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013487601187080144, "signal/frontier_coverage_15/centered_abs_mean": 0.06526126489043235, "signal/frontier_coverage_15/group_std_mean": 0.09589355587959289, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009332361165434122, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009332361165434122, "signal/frontier_coverage_20/centered_abs_mean": 0.04581375271081924, "signal/frontier_coverage_20/group_std_mean": 0.06430187001824379, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006551366415806115, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006551366415806115, "signal/frontier_coverage_25/centered_abs_mean": 0.07468874454498291, "signal/frontier_coverage_25/group_std_mean": 0.09664878100156785, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010680490406230092, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010680490406230092, "signal/frontier_coverage_5/centered_abs_mean": 0.09476696848869323, "signal/frontier_coverage_5/group_std_mean": 0.13635447770357131, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013551676413044334, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013551676413044334, "step": 175 }, { "calibration/aurc": 0.08759878898293974, "calibration/batch_distribution_entropy": 0.7113265211208019, "calibration/buffer_distribution_entropy": 0.8354377217127791, "calibration/confidence_entropy": 0.35385404928297615, "calibration/coverage@0%": 0.04326398224140371, "calibration/coverage@1%": 0.04326398224140371, "calibration/coverage@10%": 0.695155511232948, "calibration/coverage@15%": 0.8354279855474227, "calibration/coverage@20%": 0.941970765886207, "calibration/coverage@25%": 0.98125, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.38192302935652567, "calibration/ece": 0.06531083905713946, "calibration/mean_confidence": 0.8009476712326775, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013194444444444443, "completions/max_length": 3881.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 757.8158935546875, "completions/mean_terminated_length": 767.9434936523437, "completions/min_length": 0.0, "completions/min_terminated_length": 240.0, "epoch": 0.4319946000674992, "grad_norm": 0.00047206279123201966, "learning_rate": 8.433734939759036e-07, "loss": -0.01, "num_tokens": 397380498.0, "reward": 1.0324453830718994, "reward_std": 0.12025202363729477, "rewards/accuracy_reward": 0.7246527671813965, "rewards/brier_reward": 0.8183708786964417, "rewards/confidence_uniqueness_reward": 0.9126166462898254, "rewards/format_reward": 0.98671875, "rewards/frontier_coverage_0": 0.014990578033030033, "rewards/frontier_coverage_1": 0.014990578033030033, "rewards/frontier_coverage_10": 0.014904208388179541, "rewards/frontier_coverage_15": 0.017171311657875776, "rewards/frontier_coverage_20": 0.040305860340595245, "rewards/frontier_coverage_25": 0.13865080773830413, "rewards/frontier_coverage_5": 0.014990578033030033, "signal/accuracy_reward/centered_abs_mean": 0.1432291626930237, "signal/accuracy_reward/group_std_mean": 0.1884150594472885, "signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07161458134651184, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07161458134651184, "signal/advantage_abs_mean": 0.08963050693273544, "signal/advantage_pre_scale_abs_mean": 0.08963050693273544, "signal/advantage_pre_scale_std": 0.166022652387619, "signal/advantage_std": 0.166022652387619, "signal/brier_reward/centered_abs_mean": 0.12345067262649537, "signal/brier_reward/group_std_mean": 0.16434098184108734, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012345067597925663, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012345067597925663, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04739027544856071, "signal/confidence_uniqueness_reward/group_std_mean": 0.06653770804405212, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0047390274703502655, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0047390274703502655, "signal/format_reward/centered_abs_mean": 0.01977539025247097, "signal/format_reward/group_std_mean": 0.03279608637094498, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009887695126235486, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009887695126235486, "signal/frontier_coverage_0/centered_abs_mean": 0.08489089906215667, "signal/frontier_coverage_0/group_std_mean": 0.12453063875436783, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012139398604631424, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012139398604631424, "signal/frontier_coverage_1/centered_abs_mean": 0.08489089906215667, "signal/frontier_coverage_1/group_std_mean": 0.12453063875436783, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012139398604631424, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012139398604631424, "signal/frontier_coverage_10/centered_abs_mean": 0.08434738963842392, "signal/frontier_coverage_10/group_std_mean": 0.123808091878891, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001206167647615075, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001206167647615075, "signal/frontier_coverage_15/centered_abs_mean": 0.05714336410164833, "signal/frontier_coverage_15/group_std_mean": 0.0846284121274948, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008171500754542649, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008171500754542649, "signal/frontier_coverage_20/centered_abs_mean": 0.045037756115198134, "signal/frontier_coverage_20/group_std_mean": 0.061644711345434186, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006440398865379393, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006440398865379393, "signal/frontier_coverage_25/centered_abs_mean": 0.08562668412923813, "signal/frontier_coverage_25/group_std_mean": 0.11055618077516556, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012244615936651826, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012244615936651826, "signal/frontier_coverage_5/centered_abs_mean": 0.08489089906215667, "signal/frontier_coverage_5/group_std_mean": 0.12453063875436783, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012139398604631424, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012139398604631424, "step": 180 }, { "calibration/aurc": 0.1682798620491915, "calibration/batch_distribution_entropy": 0.7261975431193642, "calibration/buffer_distribution_entropy": 0.8308463518220235, "calibration/confidence_entropy": 0.376597343419283, "calibration/coverage@0%": 0.021973124820581523, "calibration/coverage@1%": 0.021973124820581523, "calibration/coverage@10%": 0.12589397393134938, "calibration/coverage@15%": 0.4402788932101213, "calibration/coverage@20%": 0.8254405435639459, "calibration/coverage@25%": 0.893771815008726, "calibration/coverage@30%": 0.9361256544502619, "calibration/coverage@5%": 0.05168102299290528, "calibration/ece": 0.11194314941012559, "calibration/mean_confidence": 0.7820696841453423, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3486.0, "completions/max_terminated_length": 3486.0, "completions/mean_length": 758.8857666015625, "completions/mean_terminated_length": 768.6445678710937, "completions/min_length": 0.0, "completions/min_terminated_length": 206.8, "epoch": 0.44399445006937416, "grad_norm": 0.00047838789760135114, "learning_rate": 6.927710843373495e-07, "loss": -0.0084, "num_tokens": 409212910.0, "reward": 1.0218183040618896, "reward_std": 0.11956067681312561, "rewards/accuracy_reward": 0.69921875, "rewards/brier_reward": 0.8167834639549255, "rewards/confidence_uniqueness_reward": 0.9220002293586731, "rewards/format_reward": 0.9873263835906982, "rewards/frontier_coverage_0": 0.027354469895362853, "rewards/frontier_coverage_1": 0.027354469895362853, "rewards/frontier_coverage_10": 0.027558755502104758, "rewards/frontier_coverage_15": 0.026396383717656135, "rewards/frontier_coverage_20": 0.04515022188425064, "rewards/frontier_coverage_25": 0.14521757364273072, "rewards/frontier_coverage_5": 0.027354469895362853, "signal/accuracy_reward/centered_abs_mean": 0.1435384124517441, "signal/accuracy_reward/group_std_mean": 0.19056273102760315, "signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07176920622587205, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07176920622587205, "signal/advantage_abs_mean": 0.08683380931615829, "signal/advantage_pre_scale_abs_mean": 0.08683380931615829, "signal/advantage_pre_scale_std": 0.1575675427913666, "signal/advantage_std": 0.1575675427913666, "signal/brier_reward/centered_abs_mean": 0.11930460929870605, "signal/brier_reward/group_std_mean": 0.15884668827056886, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011930461041629314, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011930461041629314, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042117471992969516, "signal/confidence_uniqueness_reward/group_std_mean": 0.06263997331261635, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0042117472738027574, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042117472738027574, "signal/format_reward/centered_abs_mean": 0.01792534738779068, "signal/format_reward/group_std_mean": 0.033342940360307695, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00896267369389534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00896267369389534, "signal/frontier_coverage_0/centered_abs_mean": 0.0974617674946785, "signal/frontier_coverage_0/group_std_mean": 0.13657613545656205, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013937032548710705, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013937032548710705, "signal/frontier_coverage_1/centered_abs_mean": 0.0974617674946785, "signal/frontier_coverage_1/group_std_mean": 0.13657613545656205, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013937032548710705, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013937032548710705, "signal/frontier_coverage_10/centered_abs_mean": 0.09610613882541656, "signal/frontier_coverage_10/group_std_mean": 0.1348185271024704, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013743178220465778, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013743178220465778, "signal/frontier_coverage_15/centered_abs_mean": 0.06056812852621078, "signal/frontier_coverage_15/group_std_mean": 0.08634127974510193, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008661242201924324, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008661242201924324, "signal/frontier_coverage_20/centered_abs_mean": 0.04676060602068901, "signal/frontier_coverage_20/group_std_mean": 0.06279976889491082, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006686766748316586, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006686766748316586, "signal/frontier_coverage_25/centered_abs_mean": 0.08563613891601562, "signal/frontier_coverage_25/group_std_mean": 0.11192211657762527, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012245968217030167, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012245968217030167, "signal/frontier_coverage_5/centered_abs_mean": 0.0974617674946785, "signal/frontier_coverage_5/group_std_mean": 0.13657613545656205, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013937032548710705, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013937032548710705, "step": 185 }, { "calibration/aurc": 0.15483879036252451, "calibration/batch_distribution_entropy": 0.6950153465439828, "calibration/buffer_distribution_entropy": 0.826411909010264, "calibration/confidence_entropy": 0.3410826717900854, "calibration/coverage@0%": 0.02739505924884515, "calibration/coverage@1%": 0.02739505924884515, "calibration/coverage@10%": 0.45961036352681256, "calibration/coverage@15%": 0.5195476172337611, "calibration/coverage@20%": 0.5751837242279644, "calibration/coverage@25%": 0.7749469647519582, "calibration/coverage@30%": 0.9947835073977374, "calibration/coverage@5%": 0.240377862886191, "calibration/ece": 0.10119530147175385, "calibration/mean_confidence": 0.7977167211208361, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007725694444444442, "completions/max_length": 3442.2, "completions/max_terminated_length": 3442.2, "completions/mean_length": 757.0838623046875, "completions/mean_terminated_length": 762.9970825195312, "completions/min_length": 0.0, "completions/min_terminated_length": 208.2, "epoch": 0.45599430007124914, "grad_norm": 0.0005020072567276657, "learning_rate": 5.421686746987952e-07, "loss": -0.0064, "num_tokens": 421017460.0, "reward": 1.0469782829284668, "reward_std": 0.1171686366200447, "rewards/accuracy_reward": 0.7385416626930237, "rewards/brier_reward": 0.8404343128204346, "rewards/confidence_uniqueness_reward": 0.9256058096885681, "rewards/format_reward": 0.9921875, "rewards/frontier_coverage_0": 0.024775561434216796, "rewards/frontier_coverage_1": 0.024775561434216796, "rewards/frontier_coverage_10": 0.025043190643191337, "rewards/frontier_coverage_15": 0.027431031875312328, "rewards/frontier_coverage_20": 0.051411689072847364, "rewards/frontier_coverage_25": 0.17211123406887055, "rewards/frontier_coverage_5": 0.024775561434216796, "signal/accuracy_reward/centered_abs_mean": 0.14800347089767457, "signal/accuracy_reward/group_std_mean": 0.1954652965068817, "signal/accuracy_reward/group_zero_std_frac": 0.43333333134651186, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07400173544883729, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07400173544883729, "signal/advantage_abs_mean": 0.08494782447814941, "signal/advantage_pre_scale_abs_mean": 0.08494782447814941, "signal/advantage_pre_scale_std": 0.15611115396022796, "signal/advantage_std": 0.15611115396022796, "signal/brier_reward/centered_abs_mean": 0.1102172926068306, "signal/brier_reward/group_std_mean": 0.15126932561397552, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011021729186177253, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011021729186177253, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.038782857730984686, "signal/confidence_uniqueness_reward/group_std_mean": 0.0578602485358715, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038782859221100805, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038782859221100805, "signal/format_reward/centered_abs_mean": 0.013769531343132257, "signal/format_reward/group_std_mean": 0.027529601380228995, "signal/format_reward/group_zero_std_frac": 0.8805555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006884765671566128, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006884765671566128, "signal/frontier_coverage_0/centered_abs_mean": 0.09849800616502762, "signal/frontier_coverage_0/group_std_mean": 0.13932813704013824, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014085214817896486, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014085214817896486, "signal/frontier_coverage_1/centered_abs_mean": 0.09849800616502762, "signal/frontier_coverage_1/group_std_mean": 0.13932813704013824, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014085214817896486, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014085214817896486, "signal/frontier_coverage_10/centered_abs_mean": 0.09644435942173005, "signal/frontier_coverage_10/group_std_mean": 0.136637906730175, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013791543431580066, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013791543431580066, "signal/frontier_coverage_15/centered_abs_mean": 0.05930143967270851, "signal/frontier_coverage_15/group_std_mean": 0.08500065058469772, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008480105898343027, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008480105898343027, "signal/frontier_coverage_20/centered_abs_mean": 0.047777583450078966, "signal/frontier_coverage_20/group_std_mean": 0.06432797089219093, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006832194398157298, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006832194398157298, "signal/frontier_coverage_25/centered_abs_mean": 0.09025410264730453, "signal/frontier_coverage_25/group_std_mean": 0.11796820908784866, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012906335527077318, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012906335527077318, "signal/frontier_coverage_5/centered_abs_mean": 0.09849800616502762, "signal/frontier_coverage_5/group_std_mean": 0.13932813704013824, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014085214817896486, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014085214817896486, "step": 190 }, { "calibration/aurc": 0.13881246169842548, "calibration/batch_distribution_entropy": 0.8034007790943107, "calibration/buffer_distribution_entropy": 0.8229056014565203, "calibration/confidence_entropy": 0.3790029894934074, "calibration/coverage@0%": 0.04114583333333333, "calibration/coverage@1%": 0.04114583333333333, "calibration/coverage@10%": 0.4840796301081628, "calibration/coverage@15%": 0.6186890797116715, "calibration/coverage@20%": 0.7321911642296752, "calibration/coverage@25%": 0.8936800027017224, "calibration/coverage@30%": 0.9290811058850522, "calibration/coverage@5%": 0.22732308576455895, "calibration/ece": 0.11684353143808787, "calibration/mean_confidence": 0.7342745704577593, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01015625, "completions/max_length": 3622.6, "completions/max_terminated_length": 3622.6, "completions/mean_length": 783.8465454101563, "completions/mean_terminated_length": 792.0054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 201.6, "epoch": 0.46799415007312406, "grad_norm": 0.00047749123768880963, "learning_rate": 3.91566265060241e-07, "loss": -0.0094, "num_tokens": 433128236.0, "reward": 1.0204681873321533, "reward_std": 0.11507217586040497, "rewards/accuracy_reward": 0.69296875, "rewards/brier_reward": 0.8137424111366272, "rewards/confidence_uniqueness_reward": 0.9278370261192321, "rewards/format_reward": 0.989756953716278, "rewards/frontier_coverage_0": 0.027737023681402205, "rewards/frontier_coverage_1": 0.027737023681402205, "rewards/frontier_coverage_10": 0.02771041765809059, "rewards/frontier_coverage_15": 0.027653909847140314, "rewards/frontier_coverage_20": 0.048602229356765746, "rewards/frontier_coverage_25": 0.1587932139635086, "rewards/frontier_coverage_5": 0.027737023681402205, "signal/accuracy_reward/centered_abs_mean": 0.137255859375, "signal/accuracy_reward/group_std_mean": 0.18516563177108764, "signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686279296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0686279296875, "signal/advantage_abs_mean": 0.08264462798833846, "signal/advantage_pre_scale_abs_mean": 0.08264462798833846, "signal/advantage_pre_scale_std": 0.15539104044437407, "signal/advantage_std": 0.15539104044437407, "signal/brier_reward/centered_abs_mean": 0.12252413183450699, "signal/brier_reward/group_std_mean": 0.16194479465484618, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012252412736415863, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012252412736415863, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03895300626754761, "signal/confidence_uniqueness_reward/group_std_mean": 0.05910627841949463, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003895300766453147, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003895300766453147, "signal/format_reward/centered_abs_mean": 0.017187500186264516, "signal/format_reward/group_std_mean": 0.032911072671413424, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008593750093132258, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008593750093132258, "signal/frontier_coverage_0/centered_abs_mean": 0.099191452562809, "signal/frontier_coverage_0/group_std_mean": 0.14243146479129792, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001418437750544399, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001418437750544399, "signal/frontier_coverage_1/centered_abs_mean": 0.099191452562809, "signal/frontier_coverage_1/group_std_mean": 0.14243146479129792, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001418437750544399, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001418437750544399, "signal/frontier_coverage_10/centered_abs_mean": 0.09691323190927506, "signal/frontier_coverage_10/group_std_mean": 0.13935501873493195, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001385859283618629, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001385859283618629, "signal/frontier_coverage_15/centered_abs_mean": 0.059005143493413924, "signal/frontier_coverage_15/group_std_mean": 0.08615712970495223, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008437735494226217, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008437735494226217, "signal/frontier_coverage_20/centered_abs_mean": 0.049475245922803876, "signal/frontier_coverage_20/group_std_mean": 0.06738647893071174, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007074960158206522, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007074960158206522, "signal/frontier_coverage_25/centered_abs_mean": 0.09773661196231842, "signal/frontier_coverage_25/group_std_mean": 0.12605148553848267, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013976335991173982, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013976335991173982, "signal/frontier_coverage_5/centered_abs_mean": 0.099191452562809, "signal/frontier_coverage_5/group_std_mean": 0.14243146479129792, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001418437750544399, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001418437750544399, "step": 195 }, { "calibration/aurc": 0.17450118814431104, "calibration/batch_distribution_entropy": 0.7161232206831062, "calibration/buffer_distribution_entropy": 0.8213628377832405, "calibration/confidence_entropy": 0.3782438955136188, "calibration/coverage@0%": 0.01263157894736842, "calibration/coverage@1%": 0.01263157894736842, "calibration/coverage@10%": 0.33640648618936375, "calibration/coverage@15%": 0.3862209701800192, "calibration/coverage@20%": 0.8251629706609866, "calibration/coverage@25%": 0.894030134899913, "calibration/coverage@30%": 0.9378590078328981, "calibration/coverage@5%": 0.1907503091933489, "calibration/ece": 0.13517372072619013, "calibration/mean_confidence": 0.804132633586924, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006423611111111116, "completions/max_length": 3196.8, "completions/max_terminated_length": 3196.8, "completions/mean_length": 757.547216796875, "completions/mean_terminated_length": 762.4617919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 243.2, "epoch": 0.47999400007499904, "grad_norm": 0.0004946914850734174, "learning_rate": 2.409638554216868e-07, "loss": -0.0048, "num_tokens": 444922988.0, "reward": 1.0290307998657227, "reward_std": 0.11202344298362732, "rewards/accuracy_reward": 0.7034722208976746, "rewards/brier_reward": 0.8208521723747253, "rewards/confidence_uniqueness_reward": 0.9324617624282837, "rewards/format_reward": 0.9935763835906982, "rewards/frontier_coverage_0": 0.02996818870306015, "rewards/frontier_coverage_1": 0.02996818870306015, "rewards/frontier_coverage_10": 0.03023492209613323, "rewards/frontier_coverage_15": 0.02954472191631794, "rewards/frontier_coverage_20": 0.04928178116679192, "rewards/frontier_coverage_25": 0.1629292458295822, "rewards/frontier_coverage_5": 0.02996818870306015, "signal/accuracy_reward/centered_abs_mean": 0.14172092080116272, "signal/accuracy_reward/group_std_mean": 0.18414589762687683, "signal/accuracy_reward/group_zero_std_frac": 0.4888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07086046040058136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07086046040058136, "signal/advantage_abs_mean": 0.08335170894861221, "signal/advantage_pre_scale_abs_mean": 0.08335170894861221, "signal/advantage_pre_scale_std": 0.15283463299274444, "signal/advantage_std": 0.15283463299274444, "signal/brier_reward/centered_abs_mean": 0.11691619008779526, "signal/brier_reward/group_std_mean": 0.15511786341667175, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011691619642078876, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011691619642078876, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.032414442673325536, "signal/confidence_uniqueness_reward/group_std_mean": 0.04753193408250809, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032414443790912627, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032414443790912627, "signal/format_reward/centered_abs_mean": 0.01107855886220932, "signal/format_reward/group_std_mean": 0.021301887929439545, "signal/format_reward/group_zero_std_frac": 0.9111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00553927943110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00553927943110466, "signal/frontier_coverage_0/centered_abs_mean": 0.09230706095695496, "signal/frontier_coverage_0/group_std_mean": 0.1337108790874481, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013199909590184689, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013199909590184689, "signal/frontier_coverage_1/centered_abs_mean": 0.09230706095695496, "signal/frontier_coverage_1/group_std_mean": 0.1337108790874481, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013199909590184689, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013199909590184689, "signal/frontier_coverage_10/centered_abs_mean": 0.08967625200748444, "signal/frontier_coverage_10/group_std_mean": 0.1302173465490341, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001282370393164456, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001282370393164456, "signal/frontier_coverage_15/centered_abs_mean": 0.05314598008990288, "signal/frontier_coverage_15/group_std_mean": 0.07881596982479096, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007599874981679023, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007599874981679023, "signal/frontier_coverage_20/centered_abs_mean": 0.04684214442968369, "signal/frontier_coverage_20/group_std_mean": 0.06328665986657142, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006698426441289485, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006698426441289485, "signal/frontier_coverage_25/centered_abs_mean": 0.10195220559835434, "signal/frontier_coverage_25/group_std_mean": 0.13177755773067473, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001457916502840817, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001457916502840817, "signal/frontier_coverage_5/centered_abs_mean": 0.09230706095695496, "signal/frontier_coverage_5/group_std_mean": 0.1337108790874481, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013199909590184689, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013199909590184689, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.21105365972920764, "eval_calibration/batch_distribution_entropy": 0.7556374205806667, "eval_calibration/buffer_distribution_entropy": 0.8182456951169592, "eval_calibration/confidence_entropy": 0.38275497673899267, "eval_calibration/coverage@0%": 0.13020833333333334, "eval_calibration/coverage@1%": 0.13020833333333334, "eval_calibration/coverage@10%": 0.25, "eval_calibration/coverage@15%": 0.4620295698924732, "eval_calibration/coverage@20%": 0.6500336021505376, "eval_calibration/coverage@25%": 0.8175403225806451, "eval_calibration/coverage@30%": 0.9427083333333334, "eval_calibration/coverage@5%": 0.13020833333333334, "eval_calibration/ece": 0.18436342808133366, "eval_calibration/mean_confidence": 0.7474808675974627, "eval_completions/clipped_ratio": 0.010243055555555566, "eval_completions/max_length": 2500.6666666666665, "eval_completions/max_terminated_length": 2500.6666666666665, "eval_completions/mean_length": 760.373291015625, "eval_completions/mean_terminated_length": 768.1798095703125, "eval_completions/min_length": 31.0, "eval_completions/min_terminated_length": 264.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 444922988.0, "eval_reward": 1.0161888599395752, "eval_reward_std": 0.25519714256127674, "eval_rewards/accuracy_reward": 0.6892361144224802, "eval_rewards/brier_reward": 0.8225160837173462, "eval_rewards/confidence_uniqueness_reward": 0.8791253864765167, "eval_rewards/format_reward": 0.9913194477558136, "eval_rewards/frontier_coverage_0": 0.037744694078962006, "eval_rewards/frontier_coverage_1": 0.037744694078962006, "eval_rewards/frontier_coverage_10": 0.037526294899483524, "eval_rewards/frontier_coverage_15": 0.03331689815968275, "eval_rewards/frontier_coverage_20": 0.05279789244135221, "eval_rewards/frontier_coverage_25": 0.16500501583019891, "eval_rewards/frontier_coverage_5": 0.037744694078962006, "eval_runtime": 191.4272, "eval_samples_per_second": 5.224, "eval_signal/accuracy_reward/centered_abs_mean": 0.4150390625, "eval_signal/accuracy_reward/group_std_mean": 0.46090074876944226, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20751953125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20751953125, "eval_signal/advantage_abs_mean": 0.22260082264741263, "eval_signal/advantage_pre_scale_abs_mean": 0.22260082264741263, "eval_signal/advantage_pre_scale_std": 0.2543907364209493, "eval_signal/advantage_std": 0.2543907364209493, "eval_signal/brier_reward/centered_abs_mean": 0.20484093576669693, "eval_signal/brier_reward/group_std_mean": 0.2656843389074008, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02048409388711055, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02048409388711055, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.058576944594581924, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08974225322405498, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00585769466124475, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00585769466124475, "eval_signal/format_reward/centered_abs_mean": 0.016710069340964157, "eval_signal/format_reward/group_std_mean": 0.04611522828539213, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.008355034670482079, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.008355034670482079, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.14778954287370047, "eval_signal/frontier_coverage_0/group_std_mean": 0.2527366851766904, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021133904034892717, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021133904034892717, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.14778954287370047, "eval_signal/frontier_coverage_1/group_std_mean": 0.2527366851766904, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021133904034892717, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021133904034892717, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.1412328581015269, "eval_signal/frontier_coverage_10/group_std_mean": 0.24346366773049036, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020196297749256096, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020196297749256096, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.07746745770176251, "eval_signal/frontier_coverage_15/group_std_mean": 0.14579874525467554, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011077846284024417, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011077846284024417, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.06913943216204643, "eval_signal/frontier_coverage_20/group_std_mean": 0.09641388555367787, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009886938593505572, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009886938593505572, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.20433313151200613, "eval_signal/frontier_coverage_25/group_std_mean": 0.24381026128927866, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029219637314478555, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029219637314478555, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.14778954287370047, "eval_signal/frontier_coverage_5/group_std_mean": 0.2527366851766904, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021133904034892717, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021133904034892717, "eval_steps_per_second": 0.031, "step": 200 }, { "calibration/aurc": 0.13666291697376665, "calibration/batch_distribution_entropy": 0.7343215574423284, "calibration/buffer_distribution_entropy": 0.8161242104108076, "calibration/confidence_entropy": 0.35527636219121406, "calibration/coverage@0%": 0.02404686068924036, "calibration/coverage@1%": 0.02404686068924036, "calibration/coverage@10%": 0.38688655743384415, "calibration/coverage@15%": 0.5568907728823345, "calibration/coverage@20%": 0.8736886372655908, "calibration/coverage@25%": 0.9124568852977146, "calibration/coverage@30%": 0.967469184153028, "calibration/coverage@5%": 0.18059922444215273, "calibration/ece": 0.10120437717746822, "calibration/mean_confidence": 0.774974210661998, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006684027777777768, "completions/max_length": 3385.2, "completions/max_terminated_length": 3385.2, "completions/mean_length": 771.1322143554687, "completions/mean_terminated_length": 776.322998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.491993850076874, "grad_norm": 0.000427232647780329, "learning_rate": 9.036144578313253e-08, "loss": -0.0044, "num_tokens": 456872383.0, "reward": 1.061851143836975, "reward_std": 0.10766315758228302, "rewards/accuracy_reward": 0.7657118082046509, "rewards/brier_reward": 0.8460538625717163, "rewards/confidence_uniqueness_reward": 0.9307031512260437, "rewards/format_reward": 0.9933159708976745, "rewards/frontier_coverage_0": 0.010852430667728185, "rewards/frontier_coverage_1": 0.010852430667728185, "rewards/frontier_coverage_10": 0.011620633210986853, "rewards/frontier_coverage_15": 0.022410315554589035, "rewards/frontier_coverage_20": 0.0554275631904602, "rewards/frontier_coverage_25": 0.20396708250045775, "rewards/frontier_coverage_5": 0.010852430667728185, "signal/accuracy_reward/centered_abs_mean": 0.13183051347732544, "signal/accuracy_reward/group_std_mean": 0.1805938422679901, "signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06591525673866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06591525673866272, "signal/advantage_abs_mean": 0.07528244256973267, "signal/advantage_pre_scale_abs_mean": 0.07528244256973267, "signal/advantage_pre_scale_std": 0.14561330080032348, "signal/advantage_std": 0.14561330080032348, "signal/brier_reward/centered_abs_mean": 0.10941742211580277, "signal/brier_reward/group_std_mean": 0.14824758172035218, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010941742919385433, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010941742919385433, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03412656709551811, "signal/confidence_uniqueness_reward/group_std_mean": 0.052902082353830336, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034126567654311655, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034126567654311655, "signal/format_reward/centered_abs_mean": 0.011886935960501433, "signal/format_reward/group_std_mean": 0.02607992962002754, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0059434679802507166, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0059434679802507166, "signal/frontier_coverage_0/centered_abs_mean": 0.10363202840089798, "signal/frontier_coverage_0/group_std_mean": 0.144540935754776, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014819379895925522, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014819379895925522, "signal/frontier_coverage_1/centered_abs_mean": 0.10363202840089798, "signal/frontier_coverage_1/group_std_mean": 0.144540935754776, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014819379895925522, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014819379895925522, "signal/frontier_coverage_10/centered_abs_mean": 0.09921518713235855, "signal/frontier_coverage_10/group_std_mean": 0.13871577978134156, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014187771128490567, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014187771128490567, "signal/frontier_coverage_15/centered_abs_mean": 0.06100954413414002, "signal/frontier_coverage_15/group_std_mean": 0.08565070480108261, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008724364801310003, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008724364801310003, "signal/frontier_coverage_20/centered_abs_mean": 0.05148982182145119, "signal/frontier_coverage_20/group_std_mean": 0.06858698725700378, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007363044773228466, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007363044773228466, "signal/frontier_coverage_25/centered_abs_mean": 0.09679168611764907, "signal/frontier_coverage_25/group_std_mean": 0.12770785093307496, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013841211097314955, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013841211097314955, "signal/frontier_coverage_5/centered_abs_mean": 0.10363202840089798, "signal/frontier_coverage_5/group_std_mean": 0.144540935754776, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014819379895925522, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014819379895925522, "step": 205 }, { "calibration/aurc": 0.08977735855554447, "calibration/batch_distribution_entropy": 0.7258205856417015, "calibration/buffer_distribution_entropy": 0.8129820730847942, "calibration/confidence_entropy": 0.3772828251089602, "calibration/coverage@0%": 0.014834205933682372, "calibration/coverage@1%": 0.014834205933682372, "calibration/coverage@10%": 0.6369944219839089, "calibration/coverage@15%": 0.8318157401648296, "calibration/coverage@20%": 0.9493891797556718, "calibration/coverage@25%": 0.9912739965095986, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.38716781238181164, "calibration/ece": 0.0650420626430733, "calibration/mean_confidence": 0.801219024708996, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005208333333333333, "completions/max_length": 3513.0, "completions/max_terminated_length": 3513.0, "completions/mean_length": 768.9353434244791, "completions/mean_terminated_length": 773.00634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 202.33333333333334, "epoch": 0.49919376007799904, "num_tokens": 464053312.0, "reward": 1.0329957803090413, "reward_std": 0.11097157249848048, "rewards/accuracy_reward": 0.7083333333333334, "rewards/brier_reward": 0.8263349533081055, "rewards/confidence_uniqueness_reward": 0.9369557102521261, "rewards/format_reward": 0.9947916666666666, "rewards/frontier_coverage_0": 0.024060875177383423, "rewards/frontier_coverage_1": 0.024060875177383423, "rewards/frontier_coverage_10": 0.023912989844878513, "rewards/frontier_coverage_15": 0.02603423222899437, "rewards/frontier_coverage_20": 0.053175790856281914, "rewards/frontier_coverage_25": 0.18163357178370157, "rewards/frontier_coverage_5": 0.024060875177383423, "signal/accuracy_reward/centered_abs_mean": 0.14668330550193787, "signal/accuracy_reward/group_std_mean": 0.19076116383075714, "signal/accuracy_reward/group_zero_std_frac": 0.4722222189108531, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07334165275096893, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07334165275096893, "signal/advantage_abs_mean": 0.08218363424142201, "signal/advantage_pre_scale_abs_mean": 0.08218363424142201, "signal/advantage_pre_scale_std": 0.15012098848819733, "signal/advantage_std": 0.15012098848819733, "signal/brier_reward/centered_abs_mean": 0.11896256854136784, "signal/brier_reward/group_std_mean": 0.15338205297787985, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011896257288753986, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011896257288753986, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030714243029554684, "signal/confidence_uniqueness_reward/group_std_mean": 0.04745869214336077, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030714243184775114, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030714243184775114, "signal/format_reward/centered_abs_mean": 0.009801794153948626, "signal/format_reward/group_std_mean": 0.022312050685286522, "signal/format_reward/group_zero_std_frac": 0.8981481591860453, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004900897076974313, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004900897076974313, "signal/frontier_coverage_0/centered_abs_mean": 0.11180164168278377, "signal/frontier_coverage_0/group_std_mean": 0.15422282616297403, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001598763473642369, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001598763473642369, "signal/frontier_coverage_1/centered_abs_mean": 0.11180164168278377, "signal/frontier_coverage_1/group_std_mean": 0.15422282616297403, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001598763473642369, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001598763473642369, "signal/frontier_coverage_10/centered_abs_mean": 0.10676760226488113, "signal/frontier_coverage_10/group_std_mean": 0.14766866465409598, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015267768564323585, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015267768564323585, "signal/frontier_coverage_15/centered_abs_mean": 0.06299562752246857, "signal/frontier_coverage_15/group_std_mean": 0.08778965721527736, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009008374957678219, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009008374957678219, "signal/frontier_coverage_20/centered_abs_mean": 0.05205661058425903, "signal/frontier_coverage_20/group_std_mean": 0.06810636073350906, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007444095293370386, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007444095293370386, "signal/frontier_coverage_25/centered_abs_mean": 0.10553650557994843, "signal/frontier_coverage_25/group_std_mean": 0.13382530957460403, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015091719493890803, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015091719493890803, "signal/frontier_coverage_5/centered_abs_mean": 0.11180164168278377, "signal/frontier_coverage_5/group_std_mean": 0.15422282616297403, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001598763473642369, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001598763473642369, "step": 208, "total_flos": 0.0, "train_loss": -0.006107796076461314, "train_runtime": 40819.9823, "train_samples_per_second": 0.367, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 464053312, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }