{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.514086638541351, "calibration/batch_distribution_entropy": 0.27599249583875307, "calibration/confidence_entropy": 0.2228992812774721, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4635714419377363, "calibration/mean_confidence": 0.9145515300719154, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0203125, "completions/max_length": 4017.2, "completions/max_terminated_length": 4017.2, "completions/mean_length": 517.5268188476563, "completions/mean_terminated_length": 528.2714599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.004794155713170767, "learning_rate": 5.952380952380953e-07, "loss": 0.008, "num_tokens": 9076117.0, "reward": 0.5741718530654907, "reward_std": 0.5221742153167724, "rewards/accuracy_reward": 0.2621527761220932, "rewards/brier_reward": 0.31355856657028197, "rewards/confidence_uniqueness_reward": 0.288547545671463, "rewards/format_reward": 0.5970486044883728, "rewards/frontier_aurc_reward": 0.27689927220344546, "rewards/frontier_coverage_1": 0.27689927220344546, "rewards/frontier_coverage_10": 0.27689927220344546, "rewards/frontier_coverage_15": 0.27689927220344546, "rewards/frontier_coverage_20": 0.27689927220344546, "rewards/frontier_coverage_25": 0.27689927220344546, "rewards/frontier_coverage_5": 0.27689927220344546, "rewards/frontier_ece_reward": 0.27689927220344546, "signal/accuracy_reward/centered_abs_mean": 0.3104600667953491, "signal/accuracy_reward/group_std_mean": 0.3717172920703888, "signal/accuracy_reward/group_zero_std_frac": 0.07777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15523003339767455, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15523003339767455, "signal/advantage_abs_mean": 0.4487810075283051, "signal/advantage_pre_scale_abs_mean": 0.4487810075283051, "signal/advantage_pre_scale_std": 0.5276062607765197, "signal/advantage_std": 0.5276062607765197, "signal/brier_reward/centered_abs_mean": 0.32066049575805666, "signal/brier_reward/group_std_mean": 0.3748934090137482, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04008256196975708, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04008256196975708, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2354918897151947, "signal/confidence_uniqueness_reward/group_std_mean": 0.2874836504459381, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029436486214399336, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029436486214399336, "signal/format_reward/centered_abs_mean": 0.4400065064430237, "signal/format_reward/group_std_mean": 0.47453489899635315, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.22000325322151185, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.22000325322151185, "signal/frontier_aurc_reward/centered_abs_mean": 0.31218199133872987, "signal/frontier_aurc_reward/group_std_mean": 0.3707219660282135, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_1/centered_abs_mean": 0.31218199133872987, "signal/frontier_coverage_1/group_std_mean": 0.3707219660282135, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_10/centered_abs_mean": 0.31218199133872987, "signal/frontier_coverage_10/group_std_mean": 0.3707219660282135, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_15/centered_abs_mean": 0.31218199133872987, "signal/frontier_coverage_15/group_std_mean": 0.3707219660282135, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_20/centered_abs_mean": 0.31218199133872987, "signal/frontier_coverage_20/group_std_mean": 0.3707219660282135, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_25/centered_abs_mean": 0.31218199133872987, "signal/frontier_coverage_25/group_std_mean": 0.3707219660282135, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_5/centered_abs_mean": 0.31218199133872987, "signal/frontier_coverage_5/group_std_mean": 0.3707219660282135, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005588056985288858, "signal/frontier_ece_reward/centered_abs_mean": 0.31218199133872987, "signal/frontier_ece_reward/group_std_mean": 0.3707219660282135, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.039022748917341234, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.039022748917341234, "step": 5 }, { "calibration/aurc": 0.5280547395080012, "calibration/batch_distribution_entropy": 0.25912112616058935, "calibration/confidence_entropy": 0.22271742129617608, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.48657832278132584, "calibration/mean_confidence": 0.921991283906116, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017881944444444443, "completions/max_length": 4016.4, "completions/max_terminated_length": 4016.4, "completions/mean_length": 476.5828063964844, "completions/mean_terminated_length": 485.360302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 7.4, "epoch": 0.023999700003749954, "grad_norm": 0.024597520008683205, "learning_rate": 1.1904761904761906e-06, "loss": 0.0019, "num_tokens": 17649071.0, "reward": 0.667762839794159, "reward_std": 0.48139882683753965, "rewards/accuracy_reward": 0.2855034708976746, "rewards/brier_reward": 0.35117203593254087, "rewards/confidence_uniqueness_reward": 0.3601065635681152, "rewards/format_reward": 0.7209201455116272, "rewards/frontier_aurc_reward": 0.30220218896865847, "rewards/frontier_coverage_1": 0.30220218896865847, "rewards/frontier_coverage_10": 0.30220218896865847, "rewards/frontier_coverage_15": 0.30220218896865847, "rewards/frontier_coverage_20": 0.30220218896865847, "rewards/frontier_coverage_25": 0.30220218896865847, "rewards/frontier_coverage_5": 0.30220218896865847, "rewards/frontier_ece_reward": 0.30220218896865847, "signal/accuracy_reward/centered_abs_mean": 0.3181260824203491, "signal/accuracy_reward/group_std_mean": 0.37762491106987, "signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15906304121017456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15906304121017456, "signal/advantage_abs_mean": 0.3990109622478485, "signal/advantage_pre_scale_abs_mean": 0.3990109622478485, "signal/advantage_pre_scale_std": 0.4871573269367218, "signal/advantage_std": 0.4871573269367218, "signal/brier_reward/centered_abs_mean": 0.3128700017929077, "signal/brier_reward/group_std_mean": 0.3669971525669098, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03910875022411346, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03910875022411346, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2221683979034424, "signal/confidence_uniqueness_reward/group_std_mean": 0.27797139883041383, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0277710497379303, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0277710497379303, "signal/format_reward/centered_abs_mean": 0.34768337607383726, "signal/format_reward/group_std_mean": 0.41295074224472045, "signal/format_reward/group_zero_std_frac": 0.01388888917863369, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17384168803691863, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.17384168803691863, "signal/frontier_aurc_reward/centered_abs_mean": 0.31191812753677367, "signal/frontier_aurc_reward/group_std_mean": 0.36915679574012755, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_1/centered_abs_mean": 0.31191812753677367, "signal/frontier_coverage_1/group_std_mean": 0.36915679574012755, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_10/centered_abs_mean": 0.31191812753677367, "signal/frontier_coverage_10/group_std_mean": 0.36915679574012755, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_15/centered_abs_mean": 0.31191812753677367, "signal/frontier_coverage_15/group_std_mean": 0.36915679574012755, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_20/centered_abs_mean": 0.31191812753677367, "signal/frontier_coverage_20/group_std_mean": 0.36915679574012755, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_25/centered_abs_mean": 0.31191812753677367, "signal/frontier_coverage_25/group_std_mean": 0.36915679574012755, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_5/centered_abs_mean": 0.31191812753677367, "signal/frontier_coverage_5/group_std_mean": 0.36915679574012755, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0055833343416452404, "signal/frontier_ece_reward/centered_abs_mean": 0.31191812753677367, "signal/frontier_ece_reward/group_std_mean": 0.36915679574012755, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03898976594209671, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03898976594209671, "step": 10 }, { "calibration/aurc": 0.567845924200485, "calibration/batch_distribution_entropy": 0.3045943413639345, "calibration/confidence_entropy": 0.24763369243483666, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5280282173660021, "calibration/mean_confidence": 0.9096602757540054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333326, "completions/max_length": 3921.8, "completions/max_terminated_length": 3921.8, "completions/mean_length": 413.9700561523438, "completions/mean_terminated_length": 418.13662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 58.4, "epoch": 0.03599955000562493, "grad_norm": 0.0017141081625595689, "learning_rate": 1.7857142857142859e-06, "loss": -0.0112, "num_tokens": 25519990.0, "reward": 0.8229876399040222, "reward_std": 0.37621534466743467, "rewards/accuracy_reward": 0.3091145813465118, "rewards/brier_reward": 0.4167811870574951, "rewards/confidence_uniqueness_reward": 0.5070087611675262, "rewards/format_reward": 0.9366319298744201, "rewards/frontier_aurc_reward": 0.338156646490097, "rewards/frontier_coverage_1": 0.338156646490097, "rewards/frontier_coverage_10": 0.338156646490097, "rewards/frontier_coverage_15": 0.338156646490097, "rewards/frontier_coverage_20": 0.338156646490097, "rewards/frontier_coverage_25": 0.338156646490097, "rewards/frontier_coverage_5": 0.338156646490097, "rewards/frontier_ece_reward": 0.338156646490097, "signal/accuracy_reward/centered_abs_mean": 0.32050238847732543, "signal/accuracy_reward/group_std_mean": 0.37917629480361936, "signal/accuracy_reward/group_zero_std_frac": 0.07777778059244156, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16025119423866271, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16025119423866271, "signal/advantage_abs_mean": 0.30312079191207886, "signal/advantage_pre_scale_abs_mean": 0.30312079191207886, "signal/advantage_pre_scale_std": 0.3861180067062378, "signal/advantage_std": 0.3861180067062378, "signal/brier_reward/centered_abs_mean": 0.29925207495689393, "signal/brier_reward/group_std_mean": 0.35090850591659545, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03740650936961174, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03740650936961174, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18491619527339936, "signal/confidence_uniqueness_reward/group_std_mean": 0.23462865352630616, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02311452440917492, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02311452440917492, "signal/format_reward/centered_abs_mean": 0.10967882126569747, "signal/format_reward/group_std_mean": 0.19272871911525727, "signal/format_reward/group_zero_std_frac": 0.2833333410322666, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05483941063284874, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.05483941063284874, "signal/frontier_aurc_reward/centered_abs_mean": 0.3117563307285309, "signal/frontier_aurc_reward/group_std_mean": 0.36737927198410036, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_1/centered_abs_mean": 0.3117563307285309, "signal/frontier_coverage_1/group_std_mean": 0.36737927198410036, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_10/centered_abs_mean": 0.3117563307285309, "signal/frontier_coverage_10/group_std_mean": 0.36737927198410036, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_15/centered_abs_mean": 0.3117563307285309, "signal/frontier_coverage_15/group_std_mean": 0.36737927198410036, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_20/centered_abs_mean": 0.3117563307285309, "signal/frontier_coverage_20/group_std_mean": 0.36737927198410036, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_25/centered_abs_mean": 0.3117563307285309, "signal/frontier_coverage_25/group_std_mean": 0.36737927198410036, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_5/centered_abs_mean": 0.3117563307285309, "signal/frontier_coverage_5/group_std_mean": 0.36737927198410036, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005580438114702701, "signal/frontier_ece_reward/centered_abs_mean": 0.3117563307285309, "signal/frontier_ece_reward/group_std_mean": 0.36737927198410036, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03896954134106636, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03896954134106636, "step": 15 }, { "calibration/aurc": 0.47143046891943, "calibration/batch_distribution_entropy": 0.4095787632848159, "calibration/buffer_distribution_entropy": 0.3143981234334837, "calibration/confidence_entropy": 0.31617773106851604, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.39434792860496753, "calibration/mean_confidence": 0.8800821491423632, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009375, "completions/max_length": 4021.8, "completions/max_terminated_length": 4021.8, "completions/mean_length": 432.70338134765626, "completions/mean_terminated_length": 436.8342529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.04799940000749991, "grad_norm": 0.001050017075613141, "learning_rate": 2.380952380952381e-06, "loss": -0.0105, "num_tokens": 33618429.0, "reward": 0.8896676540374756, "reward_std": 0.28481470942497256, "rewards/accuracy_reward": 0.4251736104488373, "rewards/brier_reward": 0.5469748020172119, "rewards/confidence_uniqueness_reward": 0.5929476499557496, "rewards/format_reward": 0.985937488079071, "rewards/frontier_aurc_reward": 0.16746631124988198, "rewards/frontier_coverage_1": 0.1779847363010049, "rewards/frontier_coverage_10": 0.1779847363010049, "rewards/frontier_coverage_15": 0.1779847363010049, "rewards/frontier_coverage_20": 0.1779847363010049, "rewards/frontier_coverage_25": 0.1779847363010049, "rewards/frontier_coverage_5": 0.1779847363010049, "rewards/frontier_ece_reward": 0.15606855656951665, "signal/accuracy_reward/centered_abs_mean": 0.2997070372104645, "signal/accuracy_reward/group_std_mean": 0.3657579779624939, "signal/accuracy_reward/group_zero_std_frac": 0.08055555820465088, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14985351860523224, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14985351860523224, "signal/advantage_abs_mean": 0.2284111499786377, "signal/advantage_pre_scale_abs_mean": 0.2284111499786377, "signal/advantage_pre_scale_std": 0.2946593701839447, "signal/advantage_std": 0.2946593701839447, "signal/brier_reward/centered_abs_mean": 0.2612148314714432, "signal/brier_reward/group_std_mean": 0.31739285588264465, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0326518539339304, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0326518539339304, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.186856010556221, "signal/confidence_uniqueness_reward/group_std_mean": 0.22159543633460999, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023357001319527625, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023357001319527625, "signal/format_reward/centered_abs_mean": 0.02544487789273262, "signal/format_reward/group_std_mean": 0.05656049475073814, "signal/format_reward/group_zero_std_frac": 0.7361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01272243894636631, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01272243894636631, "signal/frontier_aurc_reward/centered_abs_mean": 0.11768018077127636, "signal/frontier_aurc_reward/group_std_mean": 0.14374305196106435, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0021064750850200652, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0021064750850200652, "signal/frontier_coverage_1/centered_abs_mean": 0.13446774668991565, "signal/frontier_coverage_1/group_std_mean": 0.17186392471194267, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_10/centered_abs_mean": 0.13446774668991565, "signal/frontier_coverage_10/group_std_mean": 0.17186392471194267, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_15/centered_abs_mean": 0.13446774668991565, "signal/frontier_coverage_15/group_std_mean": 0.17186392471194267, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_20/centered_abs_mean": 0.13446774668991565, "signal/frontier_coverage_20/group_std_mean": 0.17186392471194267, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_25/centered_abs_mean": 0.13446774668991565, "signal/frontier_coverage_25/group_std_mean": 0.17186392471194267, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_5/centered_abs_mean": 0.13446774668991565, "signal/frontier_coverage_5/group_std_mean": 0.17186392471194267, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024069725011941047, "signal/frontier_ece_reward/centered_abs_mean": 0.2141528308391571, "signal/frontier_ece_reward/group_std_mean": 0.2606072276830673, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.026769103854894637, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.026769103854894637, "step": 20 }, { "calibration/aurc": 0.36235694806855867, "calibration/batch_distribution_entropy": 0.5577564400694477, "calibration/buffer_distribution_entropy": 0.3620251114970633, "calibration/confidence_entropy": 0.3781988685889721, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.020526315789473684, "calibration/coverage@15%": 0.020526315789473684, "calibration/coverage@20%": 0.07789473684210527, "calibration/coverage@25%": 0.1470138210247103, "calibration/coverage@30%": 0.39850523935370885, "calibration/coverage@5%": 0.0, "calibration/ece": 0.27206986145822254, "calibration/mean_confidence": 0.8404789311929516, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00980902777777779, "completions/max_length": 3977.8, "completions/max_terminated_length": 3977.8, "completions/mean_length": 482.4750061035156, "completions/mean_terminated_length": 487.28203735351565, "completions/min_length": 0.0, "completions/min_terminated_length": 79.6, "epoch": 0.05999925000937488, "grad_norm": 0.020144827663898468, "learning_rate": 2.9761904761904763e-06, "loss": -0.0045, "num_tokens": 42300989.0, "reward": 0.9261262536048889, "reward_std": 0.2307106077671051, "rewards/accuracy_reward": 0.5281249940395355, "rewards/brier_reward": 0.6501283884048462, "rewards/confidence_uniqueness_reward": 0.6791275620460511, "rewards/format_reward": 0.9868055582046509, "rewards/frontier_aurc_reward": -0.004348812019452452, "rewards/frontier_coverage_1": 0.00460605913103791, "rewards/frontier_coverage_10": 0.00460605913103791, "rewards/frontier_coverage_15": 0.00460605913103791, "rewards/frontier_coverage_20": 0.00460605913103791, "rewards/frontier_coverage_25": 0.00460605913103791, "rewards/frontier_coverage_5": 0.00460605913103791, "rewards/frontier_ece_reward": 0.01669727308326401, "signal/accuracy_reward/centered_abs_mean": 0.2728081583976746, "signal/accuracy_reward/group_std_mean": 0.33785536885261536, "signal/accuracy_reward/group_zero_std_frac": 0.12222222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1364040791988373, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1364040791988373, "signal/advantage_abs_mean": 0.1812939614057541, "signal/advantage_pre_scale_abs_mean": 0.1812939614057541, "signal/advantage_pre_scale_std": 0.24797289669513703, "signal/advantage_std": 0.24797289669513703, "signal/brier_reward/centered_abs_mean": 0.21097786724567413, "signal/brier_reward/group_std_mean": 0.2636861175298691, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026372233405709267, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.026372233405709267, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1371775045990944, "signal/confidence_uniqueness_reward/group_std_mean": 0.1669593095779419, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0171471880748868, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0171471880748868, "signal/format_reward/centered_abs_mean": 0.023567708767950536, "signal/format_reward/group_std_mean": 0.047248493134975436, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011783854383975268, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011783854383975268, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028534052427858113, "signal/frontier_aurc_reward/group_std_mean": 0.004276081500574946, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1075952796963975e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1075952796963975e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.03978681042790413, "signal/frontier_coverage_1/group_std_mean": 0.06365430131554603, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_10/centered_abs_mean": 0.03978681042790413, "signal/frontier_coverage_10/group_std_mean": 0.06365430131554603, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_15/centered_abs_mean": 0.03978681042790413, "signal/frontier_coverage_15/group_std_mean": 0.06365430131554603, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_20/centered_abs_mean": 0.03978681042790413, "signal/frontier_coverage_20/group_std_mean": 0.06365430131554603, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_25/centered_abs_mean": 0.03978681042790413, "signal/frontier_coverage_25/group_std_mean": 0.06365430131554603, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_5/centered_abs_mean": 0.03978681042790413, "signal/frontier_coverage_5/group_std_mean": 0.06365430131554603, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007121838862076402, "signal/frontier_ece_reward/centered_abs_mean": 0.1267393171787262, "signal/frontier_ece_reward/group_std_mean": 0.1571869283914566, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.015842414647340774, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.015842414647340774, "step": 25 }, { "calibration/aurc": 0.27921286600632367, "calibration/batch_distribution_entropy": 0.6577298107673348, "calibration/buffer_distribution_entropy": 0.44022902249815105, "calibration/confidence_entropy": 0.47141779391590316, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.007065217391304347, "calibration/coverage@15%": 0.08577783589696426, "calibration/coverage@20%": 0.1485360821538392, "calibration/coverage@25%": 0.23173721030472935, "calibration/coverage@30%": 0.5571452261540231, "calibration/coverage@5%": 0.0, "calibration/ece": 0.13807863251820546, "calibration/mean_confidence": 0.786612541696696, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017187499999999977, "completions/max_length": 4053.8, "completions/max_terminated_length": 4053.8, "completions/mean_length": 555.9796997070313, "completions/mean_terminated_length": 565.7698974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 118.4, "epoch": 0.07199910001124986, "grad_norm": 0.0005177477723918855, "learning_rate": 3.5714285714285718e-06, "loss": -0.011, "num_tokens": 51815795.0, "reward": 0.9665445923805237, "reward_std": 0.20279234647750854, "rewards/accuracy_reward": 0.5907986044883728, "rewards/brier_reward": 0.7121957659721374, "rewards/confidence_uniqueness_reward": 0.7083804368972778, "rewards/format_reward": 0.9802083253860474, "rewards/frontier_aurc_reward": -0.0033660000655800102, "rewards/frontier_coverage_1": -0.005025790445506573, "rewards/frontier_coverage_10": -0.005025790445506573, "rewards/frontier_coverage_15": -0.005025790445506573, "rewards/frontier_coverage_20": -0.005025790445506573, "rewards/frontier_coverage_25": -0.005025790445506573, "rewards/frontier_coverage_5": -0.005025790445506573, "rewards/frontier_ece_reward": 0.032553022354841234, "signal/accuracy_reward/centered_abs_mean": 0.2300238698720932, "signal/accuracy_reward/group_std_mean": 0.2947371512651443, "signal/accuracy_reward/group_zero_std_frac": 0.18888889104127884, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1150119349360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1150119349360466, "signal/advantage_abs_mean": 0.15309852957725525, "signal/advantage_pre_scale_abs_mean": 0.15309852957725525, "signal/advantage_pre_scale_std": 0.2273882269859314, "signal/advantage_std": 0.2273882269859314, "signal/brier_reward/centered_abs_mean": 0.16554278135299683, "signal/brier_reward/group_std_mean": 0.21267394721508026, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020692847669124603, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020692847669124603, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11078131943941116, "signal/confidence_uniqueness_reward/group_std_mean": 0.14092794060707092, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013847664929926395, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013847664929926395, "signal/format_reward/centered_abs_mean": 0.03274739608168602, "signal/format_reward/group_std_mean": 0.06030413955450058, "signal/format_reward/group_zero_std_frac": 0.7555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01637369804084301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01637369804084301, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018913287669420243, "signal/frontier_aurc_reward/group_std_mean": 0.0029469260945916174, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.385478412383236e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.385478412383236e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.05050327777862549, "signal/frontier_coverage_1/group_std_mean": 0.07173062860965729, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_10/centered_abs_mean": 0.05050327777862549, "signal/frontier_coverage_10/group_std_mean": 0.07173062860965729, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_15/centered_abs_mean": 0.05050327777862549, "signal/frontier_coverage_15/group_std_mean": 0.07173062860965729, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_20/centered_abs_mean": 0.05050327777862549, "signal/frontier_coverage_20/group_std_mean": 0.07173062860965729, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_25/centered_abs_mean": 0.05050327777862549, "signal/frontier_coverage_25/group_std_mean": 0.07173062860965729, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_5/centered_abs_mean": 0.05050327777862549, "signal/frontier_coverage_5/group_std_mean": 0.07173062860965729, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009040086762979627, "signal/frontier_ece_reward/centered_abs_mean": 0.08041608110070228, "signal/frontier_ece_reward/group_std_mean": 0.10180892795324326, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010052010137587785, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010052010137587785, "step": 30 }, { "calibration/aurc": 0.24546937930520882, "calibration/batch_distribution_entropy": 0.7074476039153248, "calibration/buffer_distribution_entropy": 0.5221484411007967, "calibration/confidence_entropy": 0.5234833271299201, "calibration/coverage@0%": 0.003183023872679045, "calibration/coverage@1%": 0.003183023872679045, "calibration/coverage@10%": 0.022281167108753316, "calibration/coverage@15%": 0.13367375062180317, "calibration/coverage@20%": 0.2989711554240587, "calibration/coverage@25%": 0.547051785170009, "calibration/coverage@30%": 0.8145723684210526, "calibration/coverage@5%": 0.022281167108753316, "calibration/ece": 0.09596886211635568, "calibration/mean_confidence": 0.7358937641480917, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01571180555555558, "completions/max_length": 4022.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 609.8512084960937, "completions/mean_terminated_length": 619.6409912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 151.6, "epoch": 0.08399895001312484, "grad_norm": 0.0005236866418272257, "learning_rate": 4.166666666666667e-06, "loss": -0.0102, "num_tokens": 61918721.0, "reward": 0.9899388790130615, "reward_std": 0.1820593684911728, "rewards/accuracy_reward": 0.6299479126930236, "rewards/brier_reward": 0.7458477735519409, "rewards/confidence_uniqueness_reward": 0.7157063841819763, "rewards/format_reward": 0.98046875, "rewards/frontier_aurc_reward": -0.002861540112644434, "rewards/frontier_coverage_1": -0.013961865846067668, "rewards/frontier_coverage_10": -0.013961865846067668, "rewards/frontier_coverage_15": -0.013961865846067668, "rewards/frontier_coverage_20": -0.013961865846067668, "rewards/frontier_coverage_25": -0.013961865846067668, "rewards/frontier_coverage_5": -0.013961865846067668, "rewards/frontier_ece_reward": 0.02869575172662735, "signal/accuracy_reward/centered_abs_mean": 0.21008571982383728, "signal/accuracy_reward/group_std_mean": 0.2665324449539185, "signal/accuracy_reward/group_zero_std_frac": 0.2805555611848831, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10504285991191864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10504285991191864, "signal/advantage_abs_mean": 0.13846542537212372, "signal/advantage_pre_scale_abs_mean": 0.13846542537212372, "signal/advantage_pre_scale_std": 0.20890699625015258, "signal/advantage_std": 0.20890699625015258, "signal/brier_reward/centered_abs_mean": 0.14192103445529938, "signal/brier_reward/group_std_mean": 0.1835268259048462, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017740129306912423, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017740129306912423, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1185295969247818, "signal/confidence_uniqueness_reward/group_std_mean": 0.14487815797328948, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014816199615597724, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014816199615597724, "signal/format_reward/centered_abs_mean": 0.03253580778837204, "signal/format_reward/group_std_mean": 0.05597815439105034, "signal/format_reward/group_zero_std_frac": 0.7888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01626790389418602, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01626790389418602, "signal/frontier_aurc_reward/centered_abs_mean": 0.001540156383998692, "signal/frontier_aurc_reward/group_std_mean": 0.0023954100906848907, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.756879803200718e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.756879803200718e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.06873494014143944, "signal/frontier_coverage_1/group_std_mean": 0.09156568795442581, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_10/centered_abs_mean": 0.06873494014143944, "signal/frontier_coverage_10/group_std_mean": 0.09156568795442581, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_15/centered_abs_mean": 0.06873494014143944, "signal/frontier_coverage_15/group_std_mean": 0.09156568795442581, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_20/centered_abs_mean": 0.06873494014143944, "signal/frontier_coverage_20/group_std_mean": 0.09156568795442581, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_25/centered_abs_mean": 0.06873494014143944, "signal/frontier_coverage_25/group_std_mean": 0.09156568795442581, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_5/centered_abs_mean": 0.06873494014143944, "signal/frontier_coverage_5/group_std_mean": 0.09156568795442581, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012303554220125079, "signal/frontier_ece_reward/centered_abs_mean": 0.05747309401631355, "signal/frontier_ece_reward/group_std_mean": 0.074637171626091, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007184136752039194, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007184136752039194, "step": 35 }, { "calibration/aurc": 0.27279581724387775, "calibration/batch_distribution_entropy": 0.7161346190573996, "calibration/buffer_distribution_entropy": 0.5831439661286104, "calibration/confidence_entropy": 0.5275154650697746, "calibration/coverage@0%": 0.004199475065616798, "calibration/coverage@1%": 0.004199475065616798, "calibration/coverage@10%": 0.023622047244094488, "calibration/coverage@15%": 0.06826086053227515, "calibration/coverage@20%": 0.17686038278103913, "calibration/coverage@25%": 0.3879944798344658, "calibration/coverage@30%": 0.5670690424419453, "calibration/coverage@5%": 0.004199475065616798, "calibration/ece": 0.09923312232529506, "calibration/mean_confidence": 0.7262206563584646, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014930555555555558, "completions/max_length": 3704.8, "completions/max_terminated_length": 3704.8, "completions/mean_length": 634.3372436523438, "completions/mean_terminated_length": 643.9880004882813, "completions/min_length": 0.0, "completions/min_terminated_length": 165.8, "epoch": 0.09599880001499982, "grad_norm": 0.0005042441189289093, "learning_rate": 4.761904761904762e-06, "loss": -0.0111, "num_tokens": 72345806.0, "reward": 1.0059186458587646, "reward_std": 0.17311942875385283, "rewards/accuracy_reward": 0.6474826335906982, "rewards/brier_reward": 0.7549214363098145, "rewards/confidence_uniqueness_reward": 0.7580878973007202, "rewards/format_reward": 0.9828124880790711, "rewards/frontier_aurc_reward": -0.002668565092608333, "rewards/frontier_coverage_1": -0.018375001149252057, "rewards/frontier_coverage_10": -0.018375001149252057, "rewards/frontier_coverage_15": -0.018375001149252057, "rewards/frontier_coverage_20": -0.018375001149252057, "rewards/frontier_coverage_25": -0.018375001149252057, "rewards/frontier_coverage_5": -0.018375001149252057, "rewards/frontier_ece_reward": 0.029329166933894157, "signal/accuracy_reward/centered_abs_mean": 0.19369032084941865, "signal/accuracy_reward/group_std_mean": 0.25328629910945893, "signal/accuracy_reward/group_zero_std_frac": 0.2972222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09684516042470932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09684516042470932, "signal/advantage_abs_mean": 0.1273159146308899, "signal/advantage_pre_scale_abs_mean": 0.1273159146308899, "signal/advantage_pre_scale_std": 0.2019648015499115, "signal/advantage_std": 0.2019648015499115, "signal/brier_reward/centered_abs_mean": 0.141173791885376, "signal/brier_reward/group_std_mean": 0.18503097891807557, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017646723985672, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017646723985672, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10431279838085175, "signal/confidence_uniqueness_reward/group_std_mean": 0.13210797309875488, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013039099797606469, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013039099797606469, "signal/format_reward/centered_abs_mean": 0.02906901091337204, "signal/format_reward/group_std_mean": 0.05360684543848038, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01453450545668602, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01453450545668602, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019563521724194287, "signal/frontier_aurc_reward/group_std_mean": 0.00321835745126009, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5018703420064413e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5018703420064413e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.08069958090782166, "signal/frontier_coverage_1/group_std_mean": 0.10802106261253357, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_10/centered_abs_mean": 0.08069958090782166, "signal/frontier_coverage_10/group_std_mean": 0.10802106261253357, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_15/centered_abs_mean": 0.08069958090782166, "signal/frontier_coverage_15/group_std_mean": 0.10802106261253357, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_20/centered_abs_mean": 0.08069958090782166, "signal/frontier_coverage_20/group_std_mean": 0.10802106261253357, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_25/centered_abs_mean": 0.08069958090782166, "signal/frontier_coverage_25/group_std_mean": 0.10802106261253357, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_5/centered_abs_mean": 0.08069958090782166, "signal/frontier_coverage_5/group_std_mean": 0.10802106261253357, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001444522407837212, "signal/frontier_ece_reward/centered_abs_mean": 0.05297911018133163, "signal/frontier_ece_reward/group_std_mean": 0.07072616964578629, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006622388772666454, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006622388772666454, "step": 40 }, { "calibration/aurc": 0.21362556514859965, "calibration/batch_distribution_entropy": 0.7249410130483167, "calibration/buffer_distribution_entropy": 0.6183385204926428, "calibration/confidence_entropy": 0.4615105516463262, "calibration/coverage@0%": 0.015706806282722512, "calibration/coverage@1%": 0.015706806282722512, "calibration/coverage@10%": 0.12766332802185293, "calibration/coverage@15%": 0.26664864557250173, "calibration/coverage@20%": 0.44956878151185337, "calibration/coverage@25%": 0.7303769022780143, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.015706806282722512, "calibration/ece": 0.10484668118363438, "calibration/mean_confidence": 0.7590440665093695, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017013888888888884, "completions/max_length": 3743.2, "completions/max_terminated_length": 3743.2, "completions/mean_length": 654.5295166015625, "completions/mean_terminated_length": 665.859033203125, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.1079986500168748, "grad_norm": 0.00047160135000012815, "learning_rate": 4.909638554216868e-06, "loss": -0.0147, "num_tokens": 83021250.0, "reward": 1.0147064447402954, "reward_std": 0.1708086371421814, "rewards/accuracy_reward": 0.6501736164093017, "rewards/brier_reward": 0.7591644525527954, "rewards/confidence_uniqueness_reward": 0.8012210249900817, "rewards/format_reward": 0.9823784589767456, "rewards/frontier_aurc_reward": -0.0025879590306431056, "rewards/frontier_coverage_1": -0.010252609569579362, "rewards/frontier_coverage_10": -0.010252609569579362, "rewards/frontier_coverage_15": -0.010252609569579362, "rewards/frontier_coverage_20": -0.010252609569579362, "rewards/frontier_coverage_25": -0.010252609569579362, "rewards/frontier_coverage_5": -0.010252609569579362, "rewards/frontier_ece_reward": 0.036237184703350064, "signal/accuracy_reward/centered_abs_mean": 0.19266493022441863, "signal/accuracy_reward/group_std_mean": 0.2547257900238037, "signal/accuracy_reward/group_zero_std_frac": 0.2805555611848831, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09633246511220932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09633246511220932, "signal/advantage_abs_mean": 0.12555780559778212, "signal/advantage_pre_scale_abs_mean": 0.12555780559778212, "signal/advantage_pre_scale_std": 0.19953626692295073, "signal/advantage_std": 0.19953626692295073, "signal/brier_reward/centered_abs_mean": 0.14807810485363007, "signal/brier_reward/group_std_mean": 0.19255775809288025, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01850976310670376, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01850976310670376, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0964215949177742, "signal/confidence_uniqueness_reward/group_std_mean": 0.12510152906179428, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012052699364721775, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012052699364721775, "signal/format_reward/centered_abs_mean": 0.02882486991584301, "signal/format_reward/group_std_mean": 0.05173756778240204, "signal/format_reward/group_zero_std_frac": 0.794444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014412434957921504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014412434957921504, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028790227603167295, "signal/frontier_aurc_reward/group_std_mean": 0.004534664563834667, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.153450401849113e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.153450401849113e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.08745122700929642, "signal/frontier_coverage_1/group_std_mean": 0.11857426017522812, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_10/centered_abs_mean": 0.08745122700929642, "signal/frontier_coverage_10/group_std_mean": 0.11857426017522812, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_15/centered_abs_mean": 0.08745122700929642, "signal/frontier_coverage_15/group_std_mean": 0.11857426017522812, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_20/centered_abs_mean": 0.08745122700929642, "signal/frontier_coverage_20/group_std_mean": 0.11857426017522812, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_25/centered_abs_mean": 0.08745122700929642, "signal/frontier_coverage_25/group_std_mean": 0.11857426017522812, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_5/centered_abs_mean": 0.08745122700929642, "signal/frontier_coverage_5/group_std_mean": 0.11857426017522812, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015653769718483091, "signal/frontier_ece_reward/centered_abs_mean": 0.05844959244132042, "signal/frontier_ece_reward/group_std_mean": 0.07468613833189011, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007306199055165052, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007306199055165052, "step": 45 }, { "calibration/aurc": 0.3984856338347365, "calibration/batch_distribution_entropy": 0.7660177852564631, "calibration/buffer_distribution_entropy": 0.6392927797521206, "calibration/confidence_entropy": 0.4476935003558987, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.02168021680216802, "calibration/coverage@5%": 0.0, "calibration/ece": 0.22976067826983196, "calibration/mean_confidence": 0.7545236404911386, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015885416666666673, "completions/max_length": 3047.4, "completions/max_terminated_length": 3047.4, "completions/mean_length": 653.1490600585937, "completions/mean_terminated_length": 663.627978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 182.2, "epoch": 0.11999850001874976, "grad_norm": 0.0005696824518963695, "learning_rate": 4.759036144578314e-06, "loss": -0.0146, "num_tokens": 93643127.0, "reward": 1.014067542552948, "reward_std": 0.163968026638031, "rewards/accuracy_reward": 0.6374131798744201, "rewards/brier_reward": 0.7471580624580383, "rewards/confidence_uniqueness_reward": 0.8459334373474121, "rewards/format_reward": 0.9841145873069763, "rewards/frontier_aurc_reward": -0.002862738911062479, "rewards/frontier_coverage_1": -0.0022450896329246463, "rewards/frontier_coverage_10": -0.0022450896329246463, "rewards/frontier_coverage_15": -0.0022450896329246463, "rewards/frontier_coverage_20": -0.0022450896329246463, "rewards/frontier_coverage_25": -0.0022450896329246463, "rewards/frontier_coverage_5": -0.0022450896329246463, "rewards/frontier_ece_reward": 0.03567677363753319, "signal/accuracy_reward/centered_abs_mean": 0.181884765625, "signal/accuracy_reward/group_std_mean": 0.24014606773853303, "signal/accuracy_reward/group_zero_std_frac": 0.3194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0909423828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0909423828125, "signal/advantage_abs_mean": 0.12030979841947556, "signal/advantage_pre_scale_abs_mean": 0.12030979841947556, "signal/advantage_pre_scale_std": 0.19647813737392425, "signal/advantage_std": 0.19647813737392425, "signal/brier_reward/centered_abs_mean": 0.15161194503307343, "signal/brier_reward/group_std_mean": 0.1975580185651779, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01895149312913418, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01895149312913418, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08682139664888382, "signal/confidence_uniqueness_reward/group_std_mean": 0.11508260518312455, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010852674581110477, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010852674581110477, "signal/format_reward/centered_abs_mean": 0.02594943605363369, "signal/format_reward/group_std_mean": 0.048407307267189024, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012974718026816845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012974718026816845, "signal/frontier_aurc_reward/centered_abs_mean": 0.0035359882283955814, "signal/frontier_aurc_reward/group_std_mean": 0.005427685286849737, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.329418683890254e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.329418683890254e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.08081827610731125, "signal/frontier_coverage_1/group_std_mean": 0.11278729438781739, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_10/centered_abs_mean": 0.08081827610731125, "signal/frontier_coverage_10/group_std_mean": 0.11278729438781739, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_15/centered_abs_mean": 0.08081827610731125, "signal/frontier_coverage_15/group_std_mean": 0.11278729438781739, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_20/centered_abs_mean": 0.08081827610731125, "signal/frontier_coverage_20/group_std_mean": 0.11278729438781739, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_25/centered_abs_mean": 0.08081827610731125, "signal/frontier_coverage_25/group_std_mean": 0.11278729438781739, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_5/centered_abs_mean": 0.08081827610731125, "signal/frontier_coverage_5/group_std_mean": 0.11278729438781739, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014466470805928112, "signal/frontier_ece_reward/centered_abs_mean": 0.05788221508264542, "signal/frontier_ece_reward/group_std_mean": 0.07291264832019806, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007235276885330677, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007235276885330677, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.22528452463011495, "eval_calibration/batch_distribution_entropy": 0.7327676302222469, "eval_calibration/buffer_distribution_entropy": 0.6531455892362769, "eval_calibration/confidence_entropy": 0.45147091813373913, "eval_calibration/coverage@0%": 0.12339829749103944, "eval_calibration/coverage@1%": 0.12339829749103944, "eval_calibration/coverage@10%": 0.2075716845878136, "eval_calibration/coverage@15%": 0.2720990143369176, "eval_calibration/coverage@20%": 0.4371975806451613, "eval_calibration/coverage@25%": 0.6719758064516129, "eval_calibration/coverage@30%": 0.9375, "eval_calibration/coverage@5%": 0.12339829749103944, "eval_calibration/ece": 0.1648209544670118, "eval_calibration/mean_confidence": 0.7559882185052436, "eval_completions/clipped_ratio": 0.018229166666666668, "eval_completions/max_length": 2276.0, "eval_completions/max_terminated_length": 2276.0, "eval_completions/mean_length": 635.2345784505209, "eval_completions/mean_terminated_length": 646.9802856445312, "eval_completions/min_length": 51.666666666666664, "eval_completions/min_terminated_length": 214.5, "eval_loss": 0.0, "eval_num_tokens": 93643127.0, "eval_reward": 1.0190295179684956, "eval_reward_std": 0.29277849197387695, "eval_rewards/accuracy_reward": 0.65625, "eval_rewards/brier_reward": 0.7608515123526255, "eval_rewards/confidence_uniqueness_reward": 0.8176768521467844, "eval_rewards/format_reward": 0.980034718910853, "eval_rewards/frontier_aurc_reward": -0.002395169634837657, "eval_rewards/frontier_coverage_1": -0.006012833837303333, "eval_rewards/frontier_coverage_10": -0.006012833837303333, "eval_rewards/frontier_coverage_15": -0.006012833837303333, "eval_rewards/frontier_coverage_20": -0.006012833837303333, "eval_rewards/frontier_coverage_25": -0.006012833837303333, "eval_rewards/frontier_coverage_5": -0.006012833837303333, "eval_rewards/frontier_ece_reward": 0.034077832475304604, "eval_runtime": 205.5337, "eval_samples_per_second": 4.865, "eval_signal/accuracy_reward/centered_abs_mean": 0.4380425264437993, "eval_signal/accuracy_reward/group_std_mean": 0.4743858923514684, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21902126322189966, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21902126322189966, "eval_signal/advantage_abs_mean": 0.2553912376364072, "eval_signal/advantage_pre_scale_abs_mean": 0.2553912376364072, "eval_signal/advantage_pre_scale_std": 0.29115622242291767, "eval_signal/advantage_std": 0.29115622242291767, "eval_signal/brier_reward/centered_abs_mean": 0.22910910844802856, "eval_signal/brier_reward/group_std_mean": 0.28499897321065265, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02863863855600357, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02863863855600357, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10962619632482529, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.15229293455680212, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01370327454060316, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01370327454060316, "eval_signal/format_reward/centered_abs_mean": 0.03803168454517921, "eval_signal/format_reward/group_std_mean": 0.09500421459476154, "eval_signal/format_reward/group_zero_std_frac": 0.5277777910232544, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.019015842272589605, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.019015842272589605, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033296199593072138, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005719099193811417, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9600197346298955e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9600197346298955e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.10264075919985771, "eval_signal/frontier_coverage_1/group_std_mean": 0.16208957880735397, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.10264075919985771, "eval_signal/frontier_coverage_10/group_std_mean": 0.16208957880735397, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.10264075919985771, "eval_signal/frontier_coverage_15/group_std_mean": 0.16208957880735397, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.10264075919985771, "eval_signal/frontier_coverage_20/group_std_mean": 0.16208957880735397, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.10264075919985771, "eval_signal/frontier_coverage_25/group_std_mean": 0.16208957880735397, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.10264075919985771, "eval_signal/frontier_coverage_5/group_std_mean": 0.16208957880735397, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001837269403040409, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.06874044549961884, "eval_signal/frontier_ece_reward/group_std_mean": 0.08980573217074077, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008592555687452355, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008592555687452355, "eval_steps_per_second": 0.029, "step": 50 }, { "calibration/aurc": 0.30905254136426186, "calibration/batch_distribution_entropy": 0.8075596443590124, "calibration/buffer_distribution_entropy": 0.6633267710604588, "calibration/confidence_entropy": 0.49110166338550343, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.3168765708000028, "calibration/coverage@25%": 0.5018372703412074, "calibration/coverage@30%": 0.5417322834645669, "calibration/coverage@5%": 0.0, "calibration/ece": 0.15189953369437614, "calibration/mean_confidence": 0.7286333664825811, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015885416666666673, "completions/max_length": 3439.6, "completions/max_terminated_length": 3439.6, "completions/mean_length": 658.5401977539062, "completions/mean_terminated_length": 669.3446044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 142.2, "epoch": 0.13199835002062474, "grad_norm": 0.00043861696030944586, "learning_rate": 4.60843373493976e-06, "loss": -0.0138, "num_tokens": 104310086.0, "reward": 1.0262351274490356, "reward_std": 0.15907953977584838, "rewards/accuracy_reward": 0.6447048544883728, "rewards/brier_reward": 0.7629892230033875, "rewards/confidence_uniqueness_reward": 0.9056627631187439, "rewards/format_reward": 0.9841145873069763, "rewards/frontier_aurc_reward": -0.002367356652393937, "rewards/frontier_coverage_1": -0.0021971354028210042, "rewards/frontier_coverage_10": -0.0021971354028210042, "rewards/frontier_coverage_15": -0.0021971354028210042, "rewards/frontier_coverage_20": -0.0021971354028210042, "rewards/frontier_coverage_25": -0.0021971354028210042, "rewards/frontier_coverage_5": -0.0021971354028210042, "rewards/frontier_ece_reward": 0.02817784361541271, "signal/accuracy_reward/centered_abs_mean": 0.18217773735523224, "signal/accuracy_reward/group_std_mean": 0.23912697434425353, "signal/accuracy_reward/group_zero_std_frac": 0.325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09108886867761612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09108886867761612, "signal/advantage_abs_mean": 0.11837138533592224, "signal/advantage_pre_scale_abs_mean": 0.11837138533592224, "signal/advantage_pre_scale_std": 0.1914423018693924, "signal/advantage_std": 0.1914423018693924, "signal/brier_reward/centered_abs_mean": 0.1487715631723404, "signal/brier_reward/group_std_mean": 0.1938774347305298, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01859644539654255, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01859644539654255, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06237577944993973, "signal/confidence_uniqueness_reward/group_std_mean": 0.08584694117307663, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007796972431242466, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007796972431242466, "signal/format_reward/centered_abs_mean": 0.0235948346555233, "signal/format_reward/group_std_mean": 0.040967592224478724, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01179741732776165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01179741732776165, "signal/frontier_aurc_reward/centered_abs_mean": 0.002605421328917146, "signal/frontier_aurc_reward/group_std_mean": 0.004233243642374873, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6637040941277516e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6637040941277516e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.09448704570531845, "signal/frontier_coverage_1/group_std_mean": 0.13038320094347, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_10/centered_abs_mean": 0.09448704570531845, "signal/frontier_coverage_10/group_std_mean": 0.13038320094347, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_15/centered_abs_mean": 0.09448704570531845, "signal/frontier_coverage_15/group_std_mean": 0.13038320094347, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_20/centered_abs_mean": 0.09448704570531845, "signal/frontier_coverage_20/group_std_mean": 0.13038320094347, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_25/centered_abs_mean": 0.09448704570531845, "signal/frontier_coverage_25/group_std_mean": 0.13038320094347, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_5/centered_abs_mean": 0.09448704570531845, "signal/frontier_coverage_5/group_std_mean": 0.13038320094347, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001691318047232926, "signal/frontier_ece_reward/centered_abs_mean": 0.04688545688986778, "signal/frontier_ece_reward/group_std_mean": 0.06142409965395927, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005860682111233473, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005860682111233473, "step": 55 }, { "calibration/aurc": 0.3345369982847062, "calibration/batch_distribution_entropy": 0.7979723488465243, "calibration/buffer_distribution_entropy": 0.6829944006341414, "calibration/confidence_entropy": 0.4384303562223697, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.011548556430446194, "calibration/coverage@15%": 0.2143167650918635, "calibration/coverage@20%": 0.342257217847769, "calibration/coverage@25%": 0.3931758530183727, "calibration/coverage@30%": 0.43368983957219254, "calibration/coverage@5%": 0.0, "calibration/ece": 0.19744679022940095, "calibration/mean_confidence": 0.7565729020638163, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01796875, "completions/max_length": 3591.8, "completions/max_terminated_length": 3591.8, "completions/mean_length": 652.4442749023438, "completions/mean_terminated_length": 664.4763916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 170.8, "epoch": 0.14399820002249972, "grad_norm": 0.0004398068122100085, "learning_rate": 4.457831325301205e-06, "loss": -0.0164, "num_tokens": 114922820.0, "reward": 1.0199653863906861, "reward_std": 0.1711801379919052, "rewards/accuracy_reward": 0.6309895873069763, "rewards/brier_reward": 0.7476680040359497, "rewards/confidence_uniqueness_reward": 0.9258608222007751, "rewards/format_reward": 0.9817708253860473, "rewards/frontier_aurc_reward": -0.002807429013773799, "rewards/frontier_coverage_1": 0.004628715617582202, "rewards/frontier_coverage_10": 0.004628715617582202, "rewards/frontier_coverage_15": 0.004628715617582202, "rewards/frontier_coverage_20": 0.004628715617582202, "rewards/frontier_coverage_25": 0.004628715617582202, "rewards/frontier_coverage_5": 0.004628715617582202, "rewards/frontier_ece_reward": 0.03157777301967144, "signal/accuracy_reward/centered_abs_mean": 0.1884711354970932, "signal/accuracy_reward/group_std_mean": 0.2461162716150284, "signal/accuracy_reward/group_zero_std_frac": 0.31388889253139496, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0942355677485466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0942355677485466, "signal/advantage_abs_mean": 0.12647135853767394, "signal/advantage_pre_scale_abs_mean": 0.12647135853767394, "signal/advantage_pre_scale_std": 0.2025492161512375, "signal/advantage_std": 0.2025492161512375, "signal/brier_reward/centered_abs_mean": 0.16874447762966155, "signal/brier_reward/group_std_mean": 0.22043513357639313, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021093059703707694, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021093059703707694, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05148363336920738, "signal/confidence_uniqueness_reward/group_std_mean": 0.08058208972215652, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006435454171150923, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006435454171150923, "signal/format_reward/centered_abs_mean": 0.03021918386220932, "signal/format_reward/group_std_mean": 0.05589370355010033, "signal/format_reward/group_zero_std_frac": 0.7694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01510959193110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01510959193110466, "signal/frontier_aurc_reward/centered_abs_mean": 0.003583089355379343, "signal/frontier_aurc_reward/group_std_mean": 0.005511940456926823, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.413729715859517e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.413729715859517e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.09764359593391418, "signal/frontier_coverage_1/group_std_mean": 0.1448903352022171, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_10/centered_abs_mean": 0.09764359593391418, "signal/frontier_coverage_10/group_std_mean": 0.1448903352022171, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_15/centered_abs_mean": 0.09764359593391418, "signal/frontier_coverage_15/group_std_mean": 0.1448903352022171, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_20/centered_abs_mean": 0.09764359593391418, "signal/frontier_coverage_20/group_std_mean": 0.1448903352022171, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_25/centered_abs_mean": 0.09764359593391418, "signal/frontier_coverage_25/group_std_mean": 0.1448903352022171, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_5/centered_abs_mean": 0.09764359593391418, "signal/frontier_coverage_5/group_std_mean": 0.1448903352022171, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017478203400969506, "signal/frontier_ece_reward/centered_abs_mean": 0.05283080860972404, "signal/frontier_ece_reward/group_std_mean": 0.06684889793395996, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006603851076215505, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006603851076215505, "step": 60 }, { "calibration/aurc": 0.25178061436936255, "calibration/batch_distribution_entropy": 0.782224239763399, "calibration/buffer_distribution_entropy": 0.6979801492843933, "calibration/confidence_entropy": 0.38540589096164324, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.025, "calibration/coverage@15%": 0.326465196377979, "calibration/coverage@20%": 0.6091465609610507, "calibration/coverage@25%": 0.6931875417068663, "calibration/coverage@30%": 0.7474254742547426, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16355479654311317, "calibration/mean_confidence": 0.7571751739178787, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017708333333333326, "completions/max_length": 3540.0, "completions/max_terminated_length": 3540.0, "completions/mean_length": 614.4053955078125, "completions/mean_terminated_length": 625.5040283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.1559980500243747, "grad_norm": 0.0004897533799521625, "learning_rate": 4.307228915662651e-06, "loss": -0.0152, "num_tokens": 125094818.0, "reward": 1.0328210592269897, "reward_std": 0.16226947605609893, "rewards/accuracy_reward": 0.6543402791023254, "rewards/brier_reward": 0.7571277022361755, "rewards/confidence_uniqueness_reward": 0.9207069873809814, "rewards/format_reward": 0.9821180462837219, "rewards/frontier_aurc_reward": -0.0027203528210520745, "rewards/frontier_coverage_1": 0.0034012388437986373, "rewards/frontier_coverage_10": 0.0034012388437986373, "rewards/frontier_coverage_15": 0.0034012388437986373, "rewards/frontier_coverage_20": 0.0034012388437986373, "rewards/frontier_coverage_25": 0.0034012388437986373, "rewards/frontier_coverage_5": 0.0034012388437986373, "rewards/frontier_ece_reward": 0.03636742420494556, "signal/accuracy_reward/centered_abs_mean": 0.16812065839767457, "signal/accuracy_reward/group_std_mean": 0.23046530783176422, "signal/accuracy_reward/group_zero_std_frac": 0.31111111044883727, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08406032919883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08406032919883728, "signal/advantage_abs_mean": 0.11638958901166915, "signal/advantage_pre_scale_abs_mean": 0.11638958901166915, "signal/advantage_pre_scale_std": 0.19259226322174072, "signal/advantage_std": 0.19259226322174072, "signal/brier_reward/centered_abs_mean": 0.1747972458600998, "signal/brier_reward/group_std_mean": 0.2305249333381653, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021849655732512473, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021849655732512473, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.056257478147745135, "signal/confidence_uniqueness_reward/group_std_mean": 0.08205792903900147, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007032184768468142, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007032184768468142, "signal/format_reward/centered_abs_mean": 0.02969835065305233, "signal/format_reward/group_std_mean": 0.05096030831336975, "signal/format_reward/group_zero_std_frac": 0.8083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014849175326526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014849175326526165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032653619535267355, "signal/frontier_aurc_reward/group_std_mean": 0.004880654439330101, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.844997795065865e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.844997795065865e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1121538296341896, "signal/frontier_coverage_1/group_std_mean": 0.16928686797618867, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_10/centered_abs_mean": 0.1121538296341896, "signal/frontier_coverage_10/group_std_mean": 0.16928686797618867, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_15/centered_abs_mean": 0.1121538296341896, "signal/frontier_coverage_15/group_std_mean": 0.16928686797618867, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_20/centered_abs_mean": 0.1121538296341896, "signal/frontier_coverage_20/group_std_mean": 0.16928686797618867, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_25/centered_abs_mean": 0.1121538296341896, "signal/frontier_coverage_25/group_std_mean": 0.16928686797618867, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_5/centered_abs_mean": 0.1121538296341896, "signal/frontier_coverage_5/group_std_mean": 0.16928686797618867, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002007553423754871, "signal/frontier_ece_reward/centered_abs_mean": 0.05328927487134934, "signal/frontier_ece_reward/group_std_mean": 0.06582499742507934, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006661159358918667, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006661159358918667, "step": 65 }, { "calibration/aurc": 0.2951920429356995, "calibration/batch_distribution_entropy": 0.6789171581871921, "calibration/buffer_distribution_entropy": 0.7060502188884911, "calibration/confidence_entropy": 0.3652801822043075, "calibration/coverage@0%": 0.016976127320954906, "calibration/coverage@1%": 0.016976127320954906, "calibration/coverage@10%": 0.11777188328912466, "calibration/coverage@15%": 0.13580901856763924, "calibration/coverage@20%": 0.15119363395225466, "calibration/coverage@25%": 0.2842619485873057, "calibration/coverage@30%": 0.5154877819661559, "calibration/coverage@5%": 0.070026525198939, "calibration/ece": 0.19455041095714468, "calibration/mean_confidence": 0.806789845714594, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020138888888888908, "completions/max_length": 3470.6, "completions/max_terminated_length": 3470.6, "completions/mean_length": 599.9177124023438, "completions/mean_terminated_length": 612.2438720703125, "completions/min_length": 0.0, "completions/min_terminated_length": 124.2, "epoch": 0.16799790002624967, "grad_norm": 0.00047923889360390604, "learning_rate": 4.156626506024097e-06, "loss": -0.0193, "num_tokens": 135084014.0, "reward": 1.017272448539734, "reward_std": 0.16175627410411836, "rewards/accuracy_reward": 0.6318576335906982, "rewards/brier_reward": 0.7436864018440247, "rewards/confidence_uniqueness_reward": 0.9008719086647033, "rewards/format_reward": 0.9797742962837219, "rewards/frontier_aurc_reward": -0.0031225522980093956, "rewards/frontier_coverage_1": 0.014106421242468059, "rewards/frontier_coverage_10": 0.014106421242468059, "rewards/frontier_coverage_15": 0.014106421242468059, "rewards/frontier_coverage_20": 0.014106421242468059, "rewards/frontier_coverage_25": 0.014106421242468059, "rewards/frontier_coverage_5": 0.014106421242468059, "rewards/frontier_ece_reward": 0.03542088866233826, "signal/accuracy_reward/centered_abs_mean": 0.1743109792470932, "signal/accuracy_reward/group_std_mean": 0.22547804117202758, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0871554896235466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0871554896235466, "signal/advantage_abs_mean": 0.12052754461765289, "signal/advantage_pre_scale_abs_mean": 0.12052754461765289, "signal/advantage_pre_scale_std": 0.20319488048553466, "signal/advantage_std": 0.20319488048553466, "signal/brier_reward/centered_abs_mean": 0.1648993283510208, "signal/brier_reward/group_std_mean": 0.2110469877719879, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0206124160438776, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0206124160438776, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06894725412130356, "signal/confidence_uniqueness_reward/group_std_mean": 0.097543103992939, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008618406765162945, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008618406765162945, "signal/format_reward/centered_abs_mean": 0.03365342915058136, "signal/format_reward/group_std_mean": 0.05752314925193787, "signal/format_reward/group_zero_std_frac": 0.7833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01682671457529068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01682671457529068, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033782635815441608, "signal/frontier_aurc_reward/group_std_mean": 0.004819054994732142, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.047091592336074e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.047091592336074e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.07730035781860352, "signal/frontier_coverage_1/group_std_mean": 0.11400771141052246, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_10/centered_abs_mean": 0.07730035781860352, "signal/frontier_coverage_10/group_std_mean": 0.11400771141052246, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_15/centered_abs_mean": 0.07730035781860352, "signal/frontier_coverage_15/group_std_mean": 0.11400771141052246, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_20/centered_abs_mean": 0.07730035781860352, "signal/frontier_coverage_20/group_std_mean": 0.11400771141052246, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_25/centered_abs_mean": 0.07730035781860352, "signal/frontier_coverage_25/group_std_mean": 0.11400771141052246, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_5/centered_abs_mean": 0.07730035781860352, "signal/frontier_coverage_5/group_std_mean": 0.11400771141052246, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013836764032021166, "signal/frontier_ece_reward/centered_abs_mean": 0.0494035005569458, "signal/frontier_ece_reward/group_std_mean": 0.060732795298099516, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006175437569618225, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006175437569618225, "step": 70 }, { "calibration/aurc": 0.23588326946851917, "calibration/batch_distribution_entropy": 0.7336437964325994, "calibration/buffer_distribution_entropy": 0.7058757308269206, "calibration/confidence_entropy": 0.4050379491719296, "calibration/coverage@0%": 0.0026345104155062935, "calibration/coverage@1%": 0.0026345104155062935, "calibration/coverage@10%": 0.15062862394804186, "calibration/coverage@15%": 0.24837791370120765, "calibration/coverage@20%": 0.5237080040614946, "calibration/coverage@25%": 0.6888860829670989, "calibration/coverage@30%": 0.7517615176151762, "calibration/coverage@5%": 0.0026345104155062935, "calibration/ece": 0.14133860252060226, "calibration/mean_confidence": 0.7841849798052773, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015538194444444441, "completions/max_length": 2772.4, "completions/max_terminated_length": 2772.4, "completions/mean_length": 612.7648559570313, "completions/mean_terminated_length": 622.4307373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 174.4, "epoch": 0.17999775002812465, "grad_norm": 0.0004477369075175375, "learning_rate": 4.006024096385543e-06, "loss": -0.0129, "num_tokens": 145207961.0, "reward": 1.0510570764541627, "reward_std": 0.15537019968032836, "rewards/accuracy_reward": 0.6852430582046509, "rewards/brier_reward": 0.7796695828437805, "rewards/confidence_uniqueness_reward": 0.9134960055351258, "rewards/format_reward": 0.9844618082046509, "rewards/frontier_aurc_reward": -0.002246162900701165, "rewards/frontier_coverage_1": 0.0013872329844161868, "rewards/frontier_coverage_10": 0.0013872329844161868, "rewards/frontier_coverage_15": 0.0013872329844161868, "rewards/frontier_coverage_20": 0.0013872329844161868, "rewards/frontier_coverage_25": 0.0013872329844161868, "rewards/frontier_coverage_5": 0.0013872329844161868, "rewards/frontier_ece_reward": 0.03560146205127239, "signal/accuracy_reward/centered_abs_mean": 0.17493489682674407, "signal/accuracy_reward/group_std_mean": 0.2285703092813492, "signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08746744841337203, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08746744841337203, "signal/advantage_abs_mean": 0.11331436336040497, "signal/advantage_pre_scale_abs_mean": 0.11331436336040497, "signal/advantage_pre_scale_std": 0.19401153922080994, "signal/advantage_std": 0.19401153922080994, "signal/brier_reward/centered_abs_mean": 0.1502958595752716, "signal/brier_reward/group_std_mean": 0.19655809700489044, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01878698244690895, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01878698244690895, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05595709830522537, "signal/confidence_uniqueness_reward/group_std_mean": 0.08529313653707504, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006994637288153171, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006994637288153171, "signal/format_reward/centered_abs_mean": 0.02707790769636631, "signal/format_reward/group_std_mean": 0.05181853100657463, "signal/format_reward/group_zero_std_frac": 0.7861111402511597, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013538953848183155, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013538953848183155, "signal/frontier_aurc_reward/centered_abs_mean": 0.002420555287972093, "signal/frontier_aurc_reward/group_std_mean": 0.003638601349666715, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.332793687353842e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.332793687353842e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.08835429251194, "signal/frontier_coverage_1/group_std_mean": 0.12635914981365204, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_10/centered_abs_mean": 0.08835429251194, "signal/frontier_coverage_10/group_std_mean": 0.12635914981365204, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_15/centered_abs_mean": 0.08835429251194, "signal/frontier_coverage_15/group_std_mean": 0.12635914981365204, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_20/centered_abs_mean": 0.08835429251194, "signal/frontier_coverage_20/group_std_mean": 0.12635914981365204, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_25/centered_abs_mean": 0.08835429251194, "signal/frontier_coverage_25/group_std_mean": 0.12635914981365204, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_5/centered_abs_mean": 0.08835429251194, "signal/frontier_coverage_5/group_std_mean": 0.12635914981365204, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015815417747944594, "signal/frontier_ece_reward/centered_abs_mean": 0.042579871416091916, "signal/frontier_ece_reward/group_std_mean": 0.05289793238043785, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0053224839270114895, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0053224839270114895, "step": 75 }, { "calibration/aurc": 0.2283627656944609, "calibration/batch_distribution_entropy": 0.8158849488071735, "calibration/buffer_distribution_entropy": 0.7136887505869486, "calibration/confidence_entropy": 0.4471504385566205, "calibration/coverage@0%": 0.020931500872600352, "calibration/coverage@1%": 0.020931500872600352, "calibration/coverage@10%": 0.27006980802792324, "calibration/coverage@15%": 0.3453125, "calibration/coverage@20%": 0.3697916666666667, "calibration/coverage@25%": 0.6741954607046071, "calibration/coverage@30%": 0.7745257452574525, "calibration/coverage@5%": 0.09882744328097731, "calibration/ece": 0.14655096164643444, "calibration/mean_confidence": 0.7371357132363405, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017100694444444443, "completions/max_length": 3731.6, "completions/max_terminated_length": 3731.6, "completions/mean_length": 658.69453125, "completions/mean_terminated_length": 670.2767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.19199760002999963, "grad_norm": 0.0004507862322498113, "learning_rate": 3.855421686746989e-06, "loss": -0.0163, "num_tokens": 155849402.0, "reward": 1.0312724709510803, "reward_std": 0.1551019161939621, "rewards/accuracy_reward": 0.6509548664093018, "rewards/brier_reward": 0.7650970101356507, "rewards/confidence_uniqueness_reward": 0.9270597219467163, "rewards/format_reward": 0.982812511920929, "rewards/frontier_aurc_reward": -0.0021514812484383582, "rewards/frontier_coverage_1": 0.0006764297373592854, "rewards/frontier_coverage_10": 0.0006764297373592854, "rewards/frontier_coverage_15": 0.0006764297373592854, "rewards/frontier_coverage_20": 0.0006764297373592854, "rewards/frontier_coverage_25": 0.0006764297373592854, "rewards/frontier_coverage_5": 0.0006764297373592854, "rewards/frontier_ece_reward": 0.022680159099400042, "signal/accuracy_reward/centered_abs_mean": 0.18104926347732545, "signal/accuracy_reward/group_std_mean": 0.23661141991615295, "signal/accuracy_reward/group_zero_std_frac": 0.33611112236976626, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09052463173866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09052463173866272, "signal/advantage_abs_mean": 0.11439994722604752, "signal/advantage_pre_scale_abs_mean": 0.11439994722604752, "signal/advantage_pre_scale_std": 0.1880962520837784, "signal/advantage_std": 0.1880962520837784, "signal/brier_reward/centered_abs_mean": 0.15392581820487977, "signal/brier_reward/group_std_mean": 0.19755606949329377, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01924072727560997, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01924072727560997, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.045452053844928744, "signal/confidence_uniqueness_reward/group_std_mean": 0.070206418633461, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005681506730616093, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005681506730616093, "signal/format_reward/centered_abs_mean": 0.027105035632848738, "signal/format_reward/group_std_mean": 0.04782758429646492, "signal/format_reward/group_zero_std_frac": 0.8166666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013552517816424369, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013552517816424369, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020432798657566307, "signal/frontier_aurc_reward/group_std_mean": 0.003136290283873677, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.657470879261382e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.657470879261382e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12025301158428192, "signal/frontier_coverage_1/group_std_mean": 0.16328471302986144, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_10/centered_abs_mean": 0.12025301158428192, "signal/frontier_coverage_10/group_std_mean": 0.16328471302986144, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_15/centered_abs_mean": 0.12025301158428192, "signal/frontier_coverage_15/group_std_mean": 0.16328471302986144, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_20/centered_abs_mean": 0.12025301158428192, "signal/frontier_coverage_20/group_std_mean": 0.16328471302986144, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_25/centered_abs_mean": 0.12025301158428192, "signal/frontier_coverage_25/group_std_mean": 0.16328471302986144, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_5/centered_abs_mean": 0.12025301158428192, "signal/frontier_coverage_5/group_std_mean": 0.16328471302986144, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021525288466364145, "signal/frontier_ece_reward/centered_abs_mean": 0.034540054574608806, "signal/frontier_ece_reward/group_std_mean": 0.044214902073144914, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004317506821826101, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004317506821826101, "step": 80 }, { "calibration/aurc": 0.19863731386705097, "calibration/batch_distribution_entropy": 0.8784851844174906, "calibration/buffer_distribution_entropy": 0.7286079857168308, "calibration/confidence_entropy": 0.47194029223098666, "calibration/coverage@0%": 0.006957876563311484, "calibration/coverage@1%": 0.006957876563311484, "calibration/coverage@10%": 0.21343171212142478, "calibration/coverage@15%": 0.31718648568982216, "calibration/coverage@20%": 0.5108775185596135, "calibration/coverage@25%": 0.8152747734527581, "calibration/coverage@30%": 0.9188776441194223, "calibration/coverage@5%": 0.06903902044177439, "calibration/ece": 0.11392621986175389, "calibration/mean_confidence": 0.685149595819355, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017447916666666674, "completions/max_length": 3531.0, "completions/max_terminated_length": 3531.0, "completions/mean_length": 672.5692749023438, "completions/mean_terminated_length": 684.5512817382812, "completions/min_length": 0.0, "completions/min_terminated_length": 181.8, "epoch": 0.2039974500318746, "grad_norm": 0.00040943012572824955, "learning_rate": 3.7048192771084342e-06, "loss": -0.0158, "num_tokens": 166684600.0, "reward": 1.0490335464477538, "reward_std": 0.15231254696846008, "rewards/accuracy_reward": 0.6825520753860473, "rewards/brier_reward": 0.7915989398956299, "rewards/confidence_uniqueness_reward": 0.9125476956367493, "rewards/format_reward": 0.9823784708976746, "rewards/frontier_aurc_reward": -0.0017510119127109648, "rewards/frontier_coverage_1": 0.0037823686841875316, "rewards/frontier_coverage_10": 0.0037823686841875316, "rewards/frontier_coverage_15": 0.0037823686841875316, "rewards/frontier_coverage_20": 0.0037823686841875316, "rewards/frontier_coverage_25": 0.0037823686841875316, "rewards/frontier_coverage_5": 0.0037823686841875316, "rewards/frontier_ece_reward": 0.025400371849536897, "signal/accuracy_reward/centered_abs_mean": 0.1764702707529068, "signal/accuracy_reward/group_std_mean": 0.22980018258094786, "signal/accuracy_reward/group_zero_std_frac": 0.35555556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0882351353764534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0882351353764534, "signal/advantage_abs_mean": 0.10985312461853028, "signal/advantage_pre_scale_abs_mean": 0.10985312461853028, "signal/advantage_pre_scale_std": 0.19002984464168549, "signal/advantage_std": 0.19002984464168549, "signal/brier_reward/centered_abs_mean": 0.14407358169555665, "signal/brier_reward/group_std_mean": 0.18776251673698424, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01800919771194458, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01800919771194458, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.051011984795331956, "signal/confidence_uniqueness_reward/group_std_mean": 0.08202408254146576, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063764980994164945, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063764980994164945, "signal/format_reward/centered_abs_mean": 0.03092990405857563, "signal/format_reward/group_std_mean": 0.05930749401450157, "signal/format_reward/group_zero_std_frac": 0.7555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015464952029287816, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015464952029287816, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017428799066692591, "signal/frontier_aurc_reward/group_std_mean": 0.002763870591297746, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.119755092484411e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.119755092484411e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11682608127593994, "signal/frontier_coverage_1/group_std_mean": 0.1566822350025177, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_10/centered_abs_mean": 0.11682608127593994, "signal/frontier_coverage_10/group_std_mean": 0.1566822350025177, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_15/centered_abs_mean": 0.11682608127593994, "signal/frontier_coverage_15/group_std_mean": 0.1566822350025177, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_20/centered_abs_mean": 0.11682608127593994, "signal/frontier_coverage_20/group_std_mean": 0.1566822350025177, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_25/centered_abs_mean": 0.11682608127593994, "signal/frontier_coverage_25/group_std_mean": 0.1566822350025177, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_5/centered_abs_mean": 0.11682608127593994, "signal/frontier_coverage_5/group_std_mean": 0.1566822350025177, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020911867963150144, "signal/frontier_ece_reward/centered_abs_mean": 0.03192468658089638, "signal/frontier_ece_reward/group_std_mean": 0.04087934568524361, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0039905858226120475, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0039905858226120475, "step": 85 }, { "calibration/aurc": 0.1621043276555874, "calibration/batch_distribution_entropy": 0.7713018291348717, "calibration/buffer_distribution_entropy": 0.7391826748921617, "calibration/confidence_entropy": 0.3906538953529201, "calibration/coverage@0%": 0.01748775430290532, "calibration/coverage@1%": 0.01748775430290532, "calibration/coverage@10%": 0.29865454137224134, "calibration/coverage@15%": 0.4961013783380685, "calibration/coverage@20%": 0.7170695539701011, "calibration/coverage@25%": 0.8760416319073021, "calibration/coverage@30%": 0.9350359579568505, "calibration/coverage@5%": 0.12930449114246848, "calibration/ece": 0.10879461725676424, "calibration/mean_confidence": 0.7703561898492359, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018055555555555537, "completions/max_length": 3415.8, "completions/max_terminated_length": 3415.8, "completions/mean_length": 657.1771728515625, "completions/mean_terminated_length": 669.2972290039063, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.2159973000337496, "grad_norm": 0.00045981750008650124, "learning_rate": 3.5542168674698798e-06, "loss": -0.0165, "num_tokens": 177323953.0, "reward": 1.048110580444336, "reward_std": 0.1468990057706833, "rewards/accuracy_reward": 0.6861111164093018, "rewards/brier_reward": 0.786482310295105, "rewards/confidence_uniqueness_reward": 0.8900201439857482, "rewards/format_reward": 0.9819444417953491, "rewards/frontier_aurc_reward": -0.002023177081719041, "rewards/frontier_coverage_1": 0.006752363312989474, "rewards/frontier_coverage_10": 0.006752363312989474, "rewards/frontier_coverage_15": 0.006752363312989474, "rewards/frontier_coverage_20": 0.006752363312989474, "rewards/frontier_coverage_25": 0.006752363312989474, "rewards/frontier_coverage_5": 0.006752363312989474, "rewards/frontier_ece_reward": 0.03064808137714863, "signal/accuracy_reward/centered_abs_mean": 0.16185981035232544, "signal/accuracy_reward/group_std_mean": 0.21465785503387452, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08092990517616272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08092990517616272, "signal/advantage_abs_mean": 0.10632715672254563, "signal/advantage_pre_scale_abs_mean": 0.10632715672254563, "signal/advantage_pre_scale_std": 0.18754963874816893, "signal/advantage_std": 0.18754963874816893, "signal/brier_reward/centered_abs_mean": 0.14517129957675934, "signal/brier_reward/group_std_mean": 0.1876837819814682, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018146412447094917, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018146412447094917, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0636248804628849, "signal/confidence_uniqueness_reward/group_std_mean": 0.0918489396572113, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007953110057860613, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007953110057860613, "signal/format_reward/centered_abs_mean": 0.03050130233168602, "signal/format_reward/group_std_mean": 0.054274033010005954, "signal/format_reward/group_zero_std_frac": 0.7888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01525065116584301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01525065116584301, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024305079132318497, "signal/frontier_aurc_reward/group_std_mean": 0.003738354705274105, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3506090150913224e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3506090150913224e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.09725135415792466, "signal/frontier_coverage_1/group_std_mean": 0.1383568376302719, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_10/centered_abs_mean": 0.09725135415792466, "signal/frontier_coverage_10/group_std_mean": 0.1383568376302719, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_15/centered_abs_mean": 0.09725135415792466, "signal/frontier_coverage_15/group_std_mean": 0.1383568376302719, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_20/centered_abs_mean": 0.09725135415792466, "signal/frontier_coverage_20/group_std_mean": 0.1383568376302719, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_25/centered_abs_mean": 0.09725135415792466, "signal/frontier_coverage_25/group_std_mean": 0.1383568376302719, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_5/centered_abs_mean": 0.09725135415792466, "signal/frontier_coverage_5/group_std_mean": 0.1383568376302719, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017407992389053106, "signal/frontier_ece_reward/centered_abs_mean": 0.03363135680556297, "signal/frontier_ece_reward/group_std_mean": 0.04208812639117241, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0042039196006953715, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0042039196006953715, "step": 90 }, { "calibration/aurc": 0.22302830877145974, "calibration/batch_distribution_entropy": 0.7654984420303647, "calibration/buffer_distribution_entropy": 0.7426643403332136, "calibration/confidence_entropy": 0.37485479191168325, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.32827225130890053, "calibration/coverage@15%": 0.44633415268512344, "calibration/coverage@20%": 0.5211039690113962, "calibration/coverage@25%": 0.5988238926626621, "calibration/coverage@30%": 0.70975130522652, "calibration/coverage@5%": 0.10628272251308901, "calibration/ece": 0.17951479772284673, "calibration/mean_confidence": 0.7752378849995252, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010850694444444465, "completions/max_length": 3244.6, "completions/max_terminated_length": 3244.6, "completions/mean_length": 665.167529296875, "completions/mean_terminated_length": 672.4716430664063, "completions/min_length": 0.0, "completions/min_terminated_length": 154.6, "epoch": 0.22799715003562457, "grad_norm": 0.00041399727342650294, "learning_rate": 3.4036144578313257e-06, "loss": -0.0103, "num_tokens": 188078363.0, "reward": 1.0430236101150512, "reward_std": 0.1384707883000374, "rewards/accuracy_reward": 0.6696180582046509, "rewards/brier_reward": 0.7802430033683777, "rewards/confidence_uniqueness_reward": 0.885105288028717, "rewards/format_reward": 0.989062488079071, "rewards/frontier_aurc_reward": -0.002526196092367172, "rewards/frontier_coverage_1": 0.017078271601349115, "rewards/frontier_coverage_10": 0.017078271601349115, "rewards/frontier_coverage_15": 0.017078271601349115, "rewards/frontier_coverage_20": 0.017078271601349115, "rewards/frontier_coverage_25": 0.017078271601349115, "rewards/frontier_coverage_5": 0.017078271601349115, "rewards/frontier_ece_reward": 0.029806675761938094, "signal/accuracy_reward/centered_abs_mean": 0.15270182192325593, "signal/accuracy_reward/group_std_mean": 0.2024629831314087, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07635091096162797, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07635091096162797, "signal/advantage_abs_mean": 0.09986221194267272, "signal/advantage_pre_scale_abs_mean": 0.09986221194267272, "signal/advantage_pre_scale_std": 0.17722425758838653, "signal/advantage_std": 0.17722425758838653, "signal/brier_reward/centered_abs_mean": 0.14648381173610686, "signal/brier_reward/group_std_mean": 0.19169094264507294, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018310476467013358, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018310476467013358, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06063394397497177, "signal/confidence_uniqueness_reward/group_std_mean": 0.08702098578214645, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007579242996871472, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007579242996871472, "signal/format_reward/centered_abs_mean": 0.019715712033212186, "signal/format_reward/group_std_mean": 0.039822696894407275, "signal/format_reward/group_zero_std_frac": 0.8305555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009857856016606093, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009857856016606093, "signal/frontier_aurc_reward/centered_abs_mean": 0.003348661307245493, "signal/frontier_aurc_reward/group_std_mean": 0.005126806069165468, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.994103412376717e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.994103412376717e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.08807848840951919, "signal/frontier_coverage_1/group_std_mean": 0.126033778488636, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_10/centered_abs_mean": 0.08807848840951919, "signal/frontier_coverage_10/group_std_mean": 0.126033778488636, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_15/centered_abs_mean": 0.08807848840951919, "signal/frontier_coverage_15/group_std_mean": 0.126033778488636, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_20/centered_abs_mean": 0.08807848840951919, "signal/frontier_coverage_20/group_std_mean": 0.126033778488636, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_25/centered_abs_mean": 0.08807848840951919, "signal/frontier_coverage_25/group_std_mean": 0.126033778488636, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_5/centered_abs_mean": 0.08807848840951919, "signal/frontier_coverage_5/group_std_mean": 0.126033778488636, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00157660492695868, "signal/frontier_ece_reward/centered_abs_mean": 0.03316693603992462, "signal/frontier_ece_reward/group_std_mean": 0.04110720306634903, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041458670049905775, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041458670049905775, "step": 95 }, { "calibration/aurc": 0.19005778525037625, "calibration/batch_distribution_entropy": 0.7905120036990934, "calibration/buffer_distribution_entropy": 0.7454486867596737, "calibration/confidence_entropy": 0.41916534905811276, "calibration/coverage@0%": 0.01983628239499553, "calibration/coverage@1%": 0.01983628239499553, "calibration/coverage@10%": 0.12477490240346174, "calibration/coverage@15%": 0.3196656977564555, "calibration/coverage@20%": 0.7049851243582956, "calibration/coverage@25%": 0.9022022628372499, "calibration/coverage@30%": 0.9375679721496954, "calibration/coverage@5%": 0.049326899016979446, "calibration/ece": 0.09743477958458721, "calibration/mean_confidence": 0.7563954787317189, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015364583333333303, "completions/max_length": 3783.6, "completions/max_terminated_length": 3783.6, "completions/mean_length": 696.7359375, "completions/mean_terminated_length": 707.5933471679688, "completions/min_length": 0.0, "completions/min_terminated_length": 184.8, "epoch": 0.23999700003749952, "grad_norm": 0.00038863401277922094, "learning_rate": 3.2530120481927713e-06, "loss": -0.0138, "num_tokens": 199203833.0, "reward": 1.0556352138519287, "reward_std": 0.1429404079914093, "rewards/accuracy_reward": 0.6901041626930237, "rewards/brier_reward": 0.8032171607017518, "rewards/confidence_uniqueness_reward": 0.8976126194000245, "rewards/format_reward": 0.9845486164093018, "rewards/frontier_aurc_reward": -0.0018692356767132877, "rewards/frontier_coverage_1": 0.02305122137913713, "rewards/frontier_coverage_10": 0.02305122137913713, "rewards/frontier_coverage_15": 0.02305122137913713, "rewards/frontier_coverage_20": 0.02305122137913713, "rewards/frontier_coverage_25": 0.02305122137913713, "rewards/frontier_coverage_5": 0.02305122137913713, "rewards/frontier_ece_reward": 0.02610306181013584, "signal/accuracy_reward/centered_abs_mean": 0.16201171576976775, "signal/accuracy_reward/group_std_mean": 0.21425627470016478, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08100585788488388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08100585788488388, "signal/advantage_abs_mean": 0.10346845239400863, "signal/advantage_pre_scale_abs_mean": 0.10346845239400863, "signal/advantage_pre_scale_std": 0.18151322603225709, "signal/advantage_std": 0.18151322603225709, "signal/brier_reward/centered_abs_mean": 0.13857089430093766, "signal/brier_reward/group_std_mean": 0.18216053247451783, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017321361787617208, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017321361787617208, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.050343307107686995, "signal/confidence_uniqueness_reward/group_std_mean": 0.07695924490690231, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006292913388460874, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006292913388460874, "signal/format_reward/centered_abs_mean": 0.02498914934694767, "signal/format_reward/group_std_mean": 0.04789231047034263, "signal/format_reward/group_zero_std_frac": 0.8, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012494574673473835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012494574673473835, "signal/frontier_aurc_reward/centered_abs_mean": 0.00248065204359591, "signal/frontier_aurc_reward/group_std_mean": 0.0040627093985676765, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.440367265488021e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.440367265488021e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.09468016475439071, "signal/frontier_coverage_1/group_std_mean": 0.13442795127630233, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_10/centered_abs_mean": 0.09468016475439071, "signal/frontier_coverage_10/group_std_mean": 0.13442795127630233, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_15/centered_abs_mean": 0.09468016475439071, "signal/frontier_coverage_15/group_std_mean": 0.13442795127630233, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_20/centered_abs_mean": 0.09468016475439071, "signal/frontier_coverage_20/group_std_mean": 0.13442795127630233, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_25/centered_abs_mean": 0.09468016475439071, "signal/frontier_coverage_25/group_std_mean": 0.13442795127630233, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_5/centered_abs_mean": 0.09468016475439071, "signal/frontier_coverage_5/group_std_mean": 0.13442795127630233, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016947749769315123, "signal/frontier_ece_reward/centered_abs_mean": 0.028183171153068544, "signal/frontier_ece_reward/group_std_mean": 0.035510845482349396, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003522896394133568, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003522896394133568, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.15891660441180894, "eval_calibration/batch_distribution_entropy": 0.6959708379422341, "eval_calibration/buffer_distribution_entropy": 0.7499945272337074, "eval_calibration/confidence_entropy": 0.414659227708472, "eval_calibration/coverage@0%": 0.19452284946236562, "eval_calibration/coverage@1%": 0.19452284946236562, "eval_calibration/coverage@10%": 0.3979166666666667, "eval_calibration/coverage@15%": 0.6486335125448028, "eval_calibration/coverage@20%": 0.7757056451612904, "eval_calibration/coverage@25%": 0.8178763440860215, "eval_calibration/coverage@30%": 0.9625336021505376, "eval_calibration/coverage@5%": 0.24828629032258065, "eval_calibration/ece": 0.1466347668793299, "eval_calibration/mean_confidence": 0.7744951179912419, "eval_completions/clipped_ratio": 0.012152777777777771, "eval_completions/max_length": 2663.8333333333335, "eval_completions/max_terminated_length": 2663.8333333333335, "eval_completions/mean_length": 688.960947672526, "eval_completions/mean_terminated_length": 697.5240783691406, "eval_completions/min_length": 46.333333333333336, "eval_completions/min_terminated_length": 219.66666666666666, "eval_loss": 0.0, "eval_num_tokens": 199203833.0, "eval_reward": 1.0394453605016072, "eval_reward_std": 0.2735634073615074, "eval_rewards/accuracy_reward": 0.6710069477558136, "eval_rewards/brier_reward": 0.7972110112508138, "eval_rewards/confidence_uniqueness_reward": 0.8433377345403036, "eval_rewards/format_reward": 0.9869791666666666, "eval_rewards/frontier_aurc_reward": -0.002003069695395728, "eval_rewards/frontier_coverage_1": 0.026348761282861233, "eval_rewards/frontier_coverage_10": 0.026348761282861233, "eval_rewards/frontier_coverage_15": 0.026348761282861233, "eval_rewards/frontier_coverage_20": 0.026348761282861233, "eval_rewards/frontier_coverage_25": 0.026348761282861233, "eval_rewards/frontier_coverage_5": 0.026348761282861233, "eval_rewards/frontier_ece_reward": 0.0207175404454271, "eval_runtime": 205.6415, "eval_samples_per_second": 4.863, "eval_signal/accuracy_reward/centered_abs_mean": 0.4276801198720932, "eval_signal/accuracy_reward/group_std_mean": 0.4686971952517827, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2138400599360466, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2138400599360466, "eval_signal/advantage_abs_mean": 0.237903726597627, "eval_signal/advantage_pre_scale_abs_mean": 0.237903726597627, "eval_signal/advantage_pre_scale_std": 0.2721952473123868, "eval_signal/advantage_std": 0.2721952473123868, "eval_signal/brier_reward/centered_abs_mean": 0.22816414137681326, "eval_signal/brier_reward/group_std_mean": 0.28985429803530377, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028520517672101658, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.028520517672101658, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07482141132156055, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11087949698170026, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009352676415195068, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009352676415195068, "eval_signal/format_reward/centered_abs_mean": 0.024793836598594982, "eval_signal/format_reward/group_std_mean": 0.06416239465276401, "eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012396918299297491, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.012396918299297491, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030194248538464308, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005542080150917172, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4047704907134175e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4047704907134175e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1204829066991806, "eval_signal/frontier_coverage_1/group_std_mean": 0.20749556769927344, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.1204829066991806, "eval_signal/frontier_coverage_10/group_std_mean": 0.20749556769927344, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.1204829066991806, "eval_signal/frontier_coverage_15/group_std_mean": 0.20749556769927344, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.1204829066991806, "eval_signal/frontier_coverage_20/group_std_mean": 0.20749556769927344, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.1204829066991806, "eval_signal/frontier_coverage_25/group_std_mean": 0.20749556769927344, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.1204829066991806, "eval_signal/frontier_coverage_5/group_std_mean": 0.20749556769927344, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002156644050652782, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.03396274273594221, "eval_signal/frontier_ece_reward/group_std_mean": 0.04516912375887235, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004245342841992776, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004245342841992776, "eval_steps_per_second": 0.029, "step": 100 }, { "calibration/aurc": 0.3174403591072565, "calibration/batch_distribution_entropy": 0.7832574028492738, "calibration/buffer_distribution_entropy": 0.7522402415093571, "calibration/confidence_entropy": 0.4539565113764869, "calibration/coverage@0%": 0.015250790160780538, "calibration/coverage@1%": 0.015250790160780538, "calibration/coverage@10%": 0.14032004223834288, "calibration/coverage@15%": 0.15768846329097447, "calibration/coverage@20%": 0.25616122896219523, "calibration/coverage@25%": 0.32953986243608197, "calibration/coverage@30%": 0.3985706226539276, "calibration/coverage@5%": 0.08104026384499106, "calibration/ece": 0.17285030321480427, "calibration/mean_confidence": 0.7526070795683661, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015104166666666651, "completions/max_length": 3606.8, "completions/max_terminated_length": 3606.8, "completions/mean_length": 710.5170166015625, "completions/mean_terminated_length": 721.425048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 164.6, "epoch": 0.2519968500393745, "grad_norm": 0.0004076336626894772, "learning_rate": 3.1024096385542172e-06, "loss": -0.0122, "num_tokens": 210465853.0, "reward": 1.0506530284881592, "reward_std": 0.13989392966032027, "rewards/accuracy_reward": 0.68125, "rewards/brier_reward": 0.8004813671112061, "rewards/confidence_uniqueness_reward": 0.9040295124053955, "rewards/format_reward": 0.9848958373069763, "rewards/frontier_aurc_reward": -0.0015720528550446033, "rewards/frontier_coverage_1": 0.018485220894217492, "rewards/frontier_coverage_10": 0.018485220894217492, "rewards/frontier_coverage_15": 0.018485220894217492, "rewards/frontier_coverage_20": 0.018485220894217492, "rewards/frontier_coverage_25": 0.018485220894217492, "rewards/frontier_coverage_5": 0.018485220894217492, "rewards/frontier_ece_reward": 0.0204722385853529, "signal/accuracy_reward/centered_abs_mean": 0.15923394113779069, "signal/accuracy_reward/group_std_mean": 0.21463679075241088, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07961697056889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07961697056889534, "signal/advantage_abs_mean": 0.09950301349163056, "signal/advantage_pre_scale_abs_mean": 0.09950301349163056, "signal/advantage_pre_scale_std": 0.17661311626434326, "signal/advantage_std": 0.17661311626434326, "signal/brier_reward/centered_abs_mean": 0.13340435177087784, "signal/brier_reward/group_std_mean": 0.17492244243621827, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01667554397135973, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01667554397135973, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.047524832934141156, "signal/confidence_uniqueness_reward/group_std_mean": 0.07183988243341446, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059406041167676445, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059406041167676445, "signal/format_reward/centered_abs_mean": 0.024891493655741215, "signal/format_reward/group_std_mean": 0.04560527727007866, "signal/format_reward/group_zero_std_frac": 0.8166666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012445746827870608, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012445746827870608, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016542579047381877, "signal/frontier_aurc_reward/group_std_mean": 0.002758215693756938, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9611215722979978e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9611215722979978e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.09999113231897354, "signal/frontier_coverage_1/group_std_mean": 0.13899571299552918, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_10/centered_abs_mean": 0.09999113231897354, "signal/frontier_coverage_10/group_std_mean": 0.13899571299552918, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_15/centered_abs_mean": 0.09999113231897354, "signal/frontier_coverage_15/group_std_mean": 0.13899571299552918, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_20/centered_abs_mean": 0.09999113231897354, "signal/frontier_coverage_20/group_std_mean": 0.13899571299552918, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_25/centered_abs_mean": 0.09999113231897354, "signal/frontier_coverage_25/group_std_mean": 0.13899571299552918, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_5/centered_abs_mean": 0.09999113231897354, "signal/frontier_coverage_5/group_std_mean": 0.13899571299552918, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017898412188515067, "signal/frontier_ece_reward/centered_abs_mean": 0.02349744737148285, "signal/frontier_ece_reward/group_std_mean": 0.030407802015542985, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002937180921435356, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002937180921435356, "step": 105 }, { "calibration/aurc": 0.1980429971425905, "calibration/batch_distribution_entropy": 0.8188233169740314, "calibration/buffer_distribution_entropy": 0.7578019516298132, "calibration/confidence_entropy": 0.4403369866103337, "calibration/coverage@0%": 0.04398981835652027, "calibration/coverage@1%": 0.04398981835652027, "calibration/coverage@10%": 0.2766707394091519, "calibration/coverage@15%": 0.3595005740521652, "calibration/coverage@20%": 0.4683630213987621, "calibration/coverage@25%": 0.6562042932608049, "calibration/coverage@30%": 0.8698795999435456, "calibration/coverage@5%": 0.15361152888283608, "calibration/ece": 0.11835691165409723, "calibration/mean_confidence": 0.7310259669183279, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013541666666666652, "completions/max_length": 3426.8, "completions/max_terminated_length": 3426.8, "completions/mean_length": 731.7519897460937, "completions/mean_terminated_length": 741.8162719726563, "completions/min_length": 0.0, "completions/min_terminated_length": 207.4, "epoch": 0.2639967000412495, "grad_norm": 0.0004139347583986819, "learning_rate": 2.9518072289156627e-06, "loss": -0.0117, "num_tokens": 222004084.0, "reward": 1.0684332370758056, "reward_std": 0.1274869754910469, "rewards/accuracy_reward": 0.7131944537162781, "rewards/brier_reward": 0.813305401802063, "rewards/confidence_uniqueness_reward": 0.9080566763877869, "rewards/format_reward": 0.9864583373069763, "rewards/frontier_aurc_reward": -0.001346051273867488, "rewards/frontier_coverage_1": 0.008798408973962068, "rewards/frontier_coverage_10": 0.008798408973962068, "rewards/frontier_coverage_15": 0.008798408973962068, "rewards/frontier_coverage_20": 0.008798408973962068, "rewards/frontier_coverage_25": 0.008798408973962068, "rewards/frontier_coverage_5": 0.008798408973962068, "rewards/frontier_ece_reward": 0.02012586295604706, "signal/accuracy_reward/centered_abs_mean": 0.14842664897441865, "signal/accuracy_reward/group_std_mean": 0.19679024815559387, "signal/accuracy_reward/group_zero_std_frac": 0.43888888955116273, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07421332448720933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07421332448720933, "signal/advantage_abs_mean": 0.09238868355751037, "signal/advantage_pre_scale_abs_mean": 0.09238868355751037, "signal/advantage_pre_scale_std": 0.1690053313970566, "signal/advantage_std": 0.1690053313970566, "signal/brier_reward/centered_abs_mean": 0.12360798418521882, "signal/brier_reward/group_std_mean": 0.16248373985290526, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015450998023152352, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015450998023152352, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04519175067543983, "signal/confidence_uniqueness_reward/group_std_mean": 0.06818027943372726, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005648968834429979, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005648968834429979, "signal/format_reward/centered_abs_mean": 0.022352430410683154, "signal/format_reward/group_std_mean": 0.04120796211063862, "signal/format_reward/group_zero_std_frac": 0.8333333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011176215205341577, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011176215205341577, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013575590681284666, "signal/frontier_aurc_reward/group_std_mean": 0.0022294150665402414, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4300306176883168e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4300306176883168e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10466690212488175, "signal/frontier_coverage_1/group_std_mean": 0.1473758965730667, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_10/centered_abs_mean": 0.10466690212488175, "signal/frontier_coverage_10/group_std_mean": 0.1473758965730667, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_15/centered_abs_mean": 0.10466690212488175, "signal/frontier_coverage_15/group_std_mean": 0.1473758965730667, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_20/centered_abs_mean": 0.10466690212488175, "signal/frontier_coverage_20/group_std_mean": 0.1473758965730667, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_25/centered_abs_mean": 0.10466690212488175, "signal/frontier_coverage_25/group_std_mean": 0.1473758965730667, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_5/centered_abs_mean": 0.10466690212488175, "signal/frontier_coverage_5/group_std_mean": 0.1473758965730667, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018735374789685011, "signal/frontier_ece_reward/centered_abs_mean": 0.022222984954714774, "signal/frontier_ece_reward/group_std_mean": 0.028566232323646544, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027778731193393467, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027778731193393467, "step": 110 }, { "calibration/aurc": 0.2674647745371999, "calibration/batch_distribution_entropy": 0.8655340385503951, "calibration/buffer_distribution_entropy": 0.7650680762501356, "calibration/confidence_entropy": 0.46264810399743606, "calibration/coverage@0%": 0.0356302909735624, "calibration/coverage@1%": 0.0356302909735624, "calibration/coverage@10%": 0.11016870839079522, "calibration/coverage@15%": 0.22985091263385246, "calibration/coverage@20%": 0.4752261244438419, "calibration/coverage@25%": 0.5995528351424358, "calibration/coverage@30%": 0.66065353496751, "calibration/coverage@5%": 0.059629899612001526, "calibration/ece": 0.16247371201899063, "calibration/mean_confidence": 0.6835954949209723, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01284722222222221, "completions/max_length": 3594.4, "completions/max_terminated_length": 3594.4, "completions/mean_length": 743.1428955078125, "completions/mean_terminated_length": 752.9280395507812, "completions/min_length": 0.0, "completions/min_terminated_length": 198.4, "epoch": 0.27599655004312446, "grad_norm": 0.0003546822990756482, "learning_rate": 2.8012048192771087e-06, "loss": -0.0119, "num_tokens": 233644290.0, "reward": 1.045598602294922, "reward_std": 0.1324237823486328, "rewards/accuracy_reward": 0.665538203716278, "rewards/brier_reward": 0.7980833888053894, "rewards/confidence_uniqueness_reward": 0.9122416257858277, "rewards/format_reward": 0.9870659828186035, "rewards/frontier_aurc_reward": -0.0016012408072128893, "rewards/frontier_coverage_1": 0.03013449099380523, "rewards/frontier_coverage_10": 0.03013449099380523, "rewards/frontier_coverage_15": 0.03013449099380523, "rewards/frontier_coverage_20": 0.03013449099380523, "rewards/frontier_coverage_25": 0.03013449099380523, "rewards/frontier_coverage_5": 0.03013449099380523, "rewards/frontier_ece_reward": 0.018384577706456184, "signal/accuracy_reward/centered_abs_mean": 0.16057400405406952, "signal/accuracy_reward/group_std_mean": 0.20613610446453096, "signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08028700202703476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08028700202703476, "signal/advantage_abs_mean": 0.09785276204347611, "signal/advantage_pre_scale_abs_mean": 0.09785276204347611, "signal/advantage_pre_scale_std": 0.17164961993694305, "signal/advantage_std": 0.17164961993694305, "signal/brier_reward/centered_abs_mean": 0.13958249241113663, "signal/brier_reward/group_std_mean": 0.17782102823257445, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01744781155139208, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01744781155139208, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0446560338139534, "signal/confidence_uniqueness_reward/group_std_mean": 0.06761002168059349, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005582004226744175, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005582004226744175, "signal/format_reward/centered_abs_mean": 0.021869575139135122, "signal/format_reward/group_std_mean": 0.04040019139647484, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010934787569567561, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010934787569567561, "signal/frontier_aurc_reward/centered_abs_mean": 0.001572295231744647, "signal/frontier_aurc_reward/group_std_mean": 0.002514668833464384, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8144082898506893e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8144082898506893e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12204509526491165, "signal/frontier_coverage_1/group_std_mean": 0.16676346063613892, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_10/centered_abs_mean": 0.12204509526491165, "signal/frontier_coverage_10/group_std_mean": 0.16676346063613892, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_15/centered_abs_mean": 0.12204509526491165, "signal/frontier_coverage_15/group_std_mean": 0.16676346063613892, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_20/centered_abs_mean": 0.12204509526491165, "signal/frontier_coverage_20/group_std_mean": 0.16676346063613892, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_25/centered_abs_mean": 0.12204509526491165, "signal/frontier_coverage_25/group_std_mean": 0.16676346063613892, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_5/centered_abs_mean": 0.12204509526491165, "signal/frontier_coverage_5/group_std_mean": 0.16676346063613892, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002184607065282762, "signal/frontier_ece_reward/centered_abs_mean": 0.022244375944137574, "signal/frontier_ece_reward/group_std_mean": 0.028506366163492204, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027805469930171967, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027805469930171967, "step": 115 }, { "calibration/aurc": 0.2672984725852937, "calibration/batch_distribution_entropy": 0.8792197933707804, "calibration/buffer_distribution_entropy": 0.7740401221561556, "calibration/confidence_entropy": 0.430121035681503, "calibration/coverage@0%": 0.027200066755723284, "calibration/coverage@1%": 0.027200066755723284, "calibration/coverage@10%": 0.2044821067474349, "calibration/coverage@15%": 0.42163062243374394, "calibration/coverage@20%": 0.5291554748877976, "calibration/coverage@25%": 0.5711437336692795, "calibration/coverage@30%": 0.5952792553191489, "calibration/coverage@5%": 0.13044019833467066, "calibration/ece": 0.15999251951129834, "calibration/mean_confidence": 0.6908585197289003, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013281250000000022, "completions/max_length": 3213.6, "completions/max_terminated_length": 3213.6, "completions/mean_length": 733.0212890625, "completions/mean_terminated_length": 742.8743530273438, "completions/min_length": 0.0, "completions/min_terminated_length": 226.0, "epoch": 0.28799640004499943, "grad_norm": 0.00029223994351923466, "learning_rate": 2.6506024096385547e-06, "loss": -0.012, "num_tokens": 245170551.0, "reward": 1.0578627586364746, "reward_std": 0.13152444064617158, "rewards/accuracy_reward": 0.6881944417953492, "rewards/brier_reward": 0.8041746258735657, "rewards/confidence_uniqueness_reward": 0.9153994798660279, "rewards/format_reward": 0.9867187380790711, "rewards/frontier_aurc_reward": -0.0015686721657402814, "rewards/frontier_coverage_1": 0.027476230938918888, "rewards/frontier_coverage_10": 0.027476230938918888, "rewards/frontier_coverage_15": 0.027476230938918888, "rewards/frontier_coverage_20": 0.027476230938918888, "rewards/frontier_coverage_25": 0.027476230938918888, "rewards/frontier_coverage_5": 0.027476230938918888, "rewards/frontier_ece_reward": 0.020292357727885246, "signal/accuracy_reward/centered_abs_mean": 0.1541992172598839, "signal/accuracy_reward/group_std_mean": 0.20872304141521453, "signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07709960862994195, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07709960862994195, "signal/advantage_abs_mean": 0.0929091677069664, "signal/advantage_pre_scale_abs_mean": 0.0929091677069664, "signal/advantage_pre_scale_std": 0.16713809072971345, "signal/advantage_std": 0.16713809072971345, "signal/brier_reward/centered_abs_mean": 0.14138388335704805, "signal/brier_reward/group_std_mean": 0.183754500746727, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017672985419631006, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017672985419631006, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04437449499964714, "signal/confidence_uniqueness_reward/group_std_mean": 0.06984723061323166, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005546811874955893, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005546811874955893, "signal/format_reward/centered_abs_mean": 0.02253146693110466, "signal/format_reward/group_std_mean": 0.044410817325115204, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01126573346555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01126573346555233, "signal/frontier_aurc_reward/centered_abs_mean": 0.001606982573866844, "signal/frontier_aurc_reward/group_std_mean": 0.0025633119512349367, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.876498801924754e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.876498801924754e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13902547657489778, "signal/frontier_coverage_1/group_std_mean": 0.18895367681980133, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_10/centered_abs_mean": 0.13902547657489778, "signal/frontier_coverage_10/group_std_mean": 0.18895367681980133, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_15/centered_abs_mean": 0.13902547657489778, "signal/frontier_coverage_15/group_std_mean": 0.18895367681980133, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_20/centered_abs_mean": 0.13902547657489778, "signal/frontier_coverage_20/group_std_mean": 0.18895367681980133, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_25/centered_abs_mean": 0.13902547657489778, "signal/frontier_coverage_25/group_std_mean": 0.18895367681980133, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_5/centered_abs_mean": 0.13902547657489778, "signal/frontier_coverage_5/group_std_mean": 0.18895367681980133, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024885560385882856, "signal/frontier_ece_reward/centered_abs_mean": 0.022856369987130164, "signal/frontier_ece_reward/group_std_mean": 0.02880855239927769, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028570462483912705, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028570462483912705, "step": 120 }, { "calibration/aurc": 0.1551460457832317, "calibration/batch_distribution_entropy": 0.7586701247838616, "calibration/buffer_distribution_entropy": 0.7802119276107362, "calibration/confidence_entropy": 0.36355490124193934, "calibration/coverage@0%": 0.014926286768201092, "calibration/coverage@1%": 0.014926286768201092, "calibration/coverage@10%": 0.40697873647633714, "calibration/coverage@15%": 0.5172635027475139, "calibration/coverage@20%": 0.7615805455715663, "calibration/coverage@25%": 0.8873517883711664, "calibration/coverage@30%": 0.9523954341064982, "calibration/coverage@5%": 0.1930317832221018, "calibration/ece": 0.10565811367996707, "calibration/mean_confidence": 0.7530231723896744, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014149305555555557, "completions/max_length": 3340.4, "completions/max_terminated_length": 3340.4, "completions/mean_length": 733.7210815429687, "completions/mean_terminated_length": 744.2840087890625, "completions/min_length": 0.0, "completions/min_terminated_length": 227.8, "epoch": 0.2999962500468744, "grad_norm": 0.00039019520045258105, "learning_rate": 2.5e-06, "loss": -0.0142, "num_tokens": 256740682.0, "reward": 1.0580396413803101, "reward_std": 0.13579430282115937, "rewards/accuracy_reward": 0.6907986164093017, "rewards/brier_reward": 0.8093456387519836, "rewards/confidence_uniqueness_reward": 0.8919880509376525, "rewards/format_reward": 0.985850703716278, "rewards/frontier_aurc_reward": -0.0017035908997058869, "rewards/frontier_coverage_1": 0.03645942322909832, "rewards/frontier_coverage_10": 0.03645942322909832, "rewards/frontier_coverage_15": 0.03645942322909832, "rewards/frontier_coverage_20": 0.03645942322909832, "rewards/frontier_coverage_25": 0.03645942322909832, "rewards/frontier_coverage_5": 0.03645942322909832, "rewards/frontier_ece_reward": 0.025304096192121504, "signal/accuracy_reward/centered_abs_mean": 0.15849609375, "signal/accuracy_reward/group_std_mean": 0.2060663789510727, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.079248046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.079248046875, "signal/advantage_abs_mean": 0.09873658120632171, "signal/advantage_pre_scale_abs_mean": 0.09873658120632171, "signal/advantage_pre_scale_std": 0.17506541907787324, "signal/advantage_std": 0.17506541907787324, "signal/brier_reward/centered_abs_mean": 0.1506500333547592, "signal/brier_reward/group_std_mean": 0.19419657588005065, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0188312541693449, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0188312541693449, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05950758457183838, "signal/confidence_uniqueness_reward/group_std_mean": 0.08664509057998657, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007438448071479797, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007438448071479797, "signal/format_reward/centered_abs_mean": 0.02527669258415699, "signal/format_reward/group_std_mean": 0.0472879096865654, "signal/format_reward/group_zero_std_frac": 0.8138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012638346292078495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012638346292078495, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022916203830391168, "signal/frontier_aurc_reward/group_std_mean": 0.0036330488976091145, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.102000239072368e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.102000239072368e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13295071721076965, "signal/frontier_coverage_1/group_std_mean": 0.18857296407222748, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_10/centered_abs_mean": 0.13295071721076965, "signal/frontier_coverage_10/group_std_mean": 0.18857296407222748, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_15/centered_abs_mean": 0.13295071721076965, "signal/frontier_coverage_15/group_std_mean": 0.18857296407222748, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_20/centered_abs_mean": 0.13295071721076965, "signal/frontier_coverage_20/group_std_mean": 0.18857296407222748, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_25/centered_abs_mean": 0.13295071721076965, "signal/frontier_coverage_25/group_std_mean": 0.18857296407222748, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_5/centered_abs_mean": 0.13295071721076965, "signal/frontier_coverage_5/group_std_mean": 0.18857296407222748, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00237981784157455, "signal/frontier_ece_reward/centered_abs_mean": 0.024529390409588812, "signal/frontier_ece_reward/group_std_mean": 0.03039325512945652, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030661738011986015, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030661738011986015, "step": 125 }, { "calibration/aurc": 0.21937393087793167, "calibration/batch_distribution_entropy": 0.7982473467783474, "calibration/buffer_distribution_entropy": 0.7814824550767334, "calibration/confidence_entropy": 0.3547674800413071, "calibration/coverage@0%": 0.010066104916730503, "calibration/coverage@1%": 0.010066104916730503, "calibration/coverage@10%": 0.1716406920783105, "calibration/coverage@15%": 0.31217969811753826, "calibration/coverage@20%": 0.51458351029786, "calibration/coverage@25%": 0.6728991822165153, "calibration/coverage@30%": 0.8798521895852263, "calibration/coverage@5%": 0.05594028905782202, "calibration/ece": 0.13592891891689865, "calibration/mean_confidence": 0.713633518271023, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015364583333333326, "completions/max_length": 3396.2, "completions/max_terminated_length": 3396.2, "completions/mean_length": 766.4459350585937, "completions/mean_terminated_length": 778.4260375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 185.4, "epoch": 0.3119961000487494, "grad_norm": 0.0004305084585212171, "learning_rate": 2.349397590361446e-06, "loss": -0.0145, "num_tokens": 268694939.0, "reward": 1.0496493101119995, "reward_std": 0.1406361937522888, "rewards/accuracy_reward": 0.6756076455116272, "rewards/brier_reward": 0.800056254863739, "rewards/confidence_uniqueness_reward": 0.8977401375770568, "rewards/format_reward": 0.9846354126930237, "rewards/frontier_aurc_reward": -0.001901687984354794, "rewards/frontier_coverage_1": 0.0432504091411829, "rewards/frontier_coverage_10": 0.0432504091411829, "rewards/frontier_coverage_15": 0.0432504091411829, "rewards/frontier_coverage_20": 0.0432504091411829, "rewards/frontier_coverage_25": 0.0432504091411829, "rewards/frontier_coverage_5": 0.0432504091411829, "rewards/frontier_ece_reward": 0.02153747119009495, "signal/accuracy_reward/centered_abs_mean": 0.16565212607383728, "signal/accuracy_reward/group_std_mean": 0.21800636351108552, "signal/accuracy_reward/group_zero_std_frac": 0.37777777910232546, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08282606303691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08282606303691864, "signal/advantage_abs_mean": 0.1018408328294754, "signal/advantage_pre_scale_abs_mean": 0.1018408328294754, "signal/advantage_pre_scale_std": 0.17831650972366334, "signal/advantage_std": 0.17831650972366334, "signal/brier_reward/centered_abs_mean": 0.15308336317539215, "signal/brier_reward/group_std_mean": 0.19842869639396668, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019135420396924018, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019135420396924018, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0566624753177166, "signal/confidence_uniqueness_reward/group_std_mean": 0.08453233689069747, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007082809414714575, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007082809414714575, "signal/format_reward/centered_abs_mean": 0.0266004778444767, "signal/format_reward/group_std_mean": 0.049190875887870786, "signal/format_reward/group_zero_std_frac": 0.8055555820465088, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01330023892223835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01330023892223835, "signal/frontier_aurc_reward/centered_abs_mean": 0.002477661520242691, "signal/frontier_aurc_reward/group_std_mean": 0.0040058012586086985, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.435014052432962e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.435014052432962e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14277483373880387, "signal/frontier_coverage_1/group_std_mean": 0.19749794900417328, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_10/centered_abs_mean": 0.14277483373880387, "signal/frontier_coverage_10/group_std_mean": 0.19749794900417328, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_15/centered_abs_mean": 0.14277483373880387, "signal/frontier_coverage_15/group_std_mean": 0.19749794900417328, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_20/centered_abs_mean": 0.14277483373880387, "signal/frontier_coverage_20/group_std_mean": 0.19749794900417328, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_25/centered_abs_mean": 0.14277483373880387, "signal/frontier_coverage_25/group_std_mean": 0.19749794900417328, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_5/centered_abs_mean": 0.14277483373880387, "signal/frontier_coverage_5/group_std_mean": 0.19749794900417328, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025556694716215135, "signal/frontier_ece_reward/centered_abs_mean": 0.023040265217423438, "signal/frontier_ece_reward/group_std_mean": 0.028788076341152193, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028800331521779297, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028800331521779297, "step": 130 }, { "calibration/aurc": 0.21178649539996738, "calibration/batch_distribution_entropy": 0.8621768605224684, "calibration/buffer_distribution_entropy": 0.7861469496776874, "calibration/confidence_entropy": 0.41761171099367456, "calibration/coverage@0%": 0.011613063660477454, "calibration/coverage@1%": 0.10067556366047745, "calibration/coverage@10%": 0.26419098143236075, "calibration/coverage@15%": 0.37293739888807675, "calibration/coverage@20%": 0.548718324485379, "calibration/coverage@25%": 0.6227433237201573, "calibration/coverage@30%": 0.7633802420489727, "calibration/coverage@5%": 0.19650889699381077, "calibration/ece": 0.1498688341904781, "calibration/mean_confidence": 0.6908206350495991, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3772.4, "completions/max_terminated_length": 3772.4, "completions/mean_length": 758.24306640625, "completions/mean_terminated_length": 767.1976684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 205.2, "epoch": 0.32399595005062437, "grad_norm": 0.00038663134910166264, "learning_rate": 2.1987951807228917e-06, "loss": -0.0098, "num_tokens": 280522923.0, "reward": 1.0594348669052125, "reward_std": 0.12770668268203736, "rewards/accuracy_reward": 0.6868055582046508, "rewards/brier_reward": 0.8069122552871704, "rewards/confidence_uniqueness_reward": 0.9251725912094116, "rewards/format_reward": 0.9881944537162781, "rewards/frontier_aurc_reward": -0.0012947394163347781, "rewards/frontier_coverage_1": 0.033542437851428984, "rewards/frontier_coverage_10": 0.033542437851428984, "rewards/frontier_coverage_15": 0.033542437851428984, "rewards/frontier_coverage_20": 0.033542437851428984, "rewards/frontier_coverage_25": 0.033542437851428984, "rewards/frontier_coverage_5": 0.033542437851428984, "rewards/frontier_ece_reward": 0.014759739115834235, "signal/accuracy_reward/centered_abs_mean": 0.15372178852558135, "signal/accuracy_reward/group_std_mean": 0.20507141947746277, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07686089426279068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07686089426279068, "signal/advantage_abs_mean": 0.09132870435714721, "signal/advantage_pre_scale_abs_mean": 0.09132870435714721, "signal/advantage_pre_scale_std": 0.16071320176124573, "signal/advantage_std": 0.16071320176124573, "signal/brier_reward/centered_abs_mean": 0.14716649651527405, "signal/brier_reward/group_std_mean": 0.19055280685424805, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018395812064409257, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018395812064409257, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.038749721646308896, "signal/confidence_uniqueness_reward/group_std_mean": 0.06294624656438827, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004843715205788612, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004843715205788612, "signal/format_reward/centered_abs_mean": 0.02109375, "signal/format_reward/group_std_mean": 0.042469137161970136, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013480266556143762, "signal/frontier_aurc_reward/group_std_mean": 0.0022415920160710812, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4129677694872952e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4129677694872952e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16463718116283416, "signal/frontier_coverage_1/group_std_mean": 0.22296231091022492, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_10/centered_abs_mean": 0.16463718116283416, "signal/frontier_coverage_10/group_std_mean": 0.22296231091022492, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_15/centered_abs_mean": 0.16463718116283416, "signal/frontier_coverage_15/group_std_mean": 0.22296231091022492, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_20/centered_abs_mean": 0.16463718116283416, "signal/frontier_coverage_20/group_std_mean": 0.22296231091022492, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_25/centered_abs_mean": 0.16463718116283416, "signal/frontier_coverage_25/group_std_mean": 0.22296231091022492, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_5/centered_abs_mean": 0.16463718116283416, "signal/frontier_coverage_5/group_std_mean": 0.22296231091022492, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002947005443274975, "signal/frontier_ece_reward/centered_abs_mean": 0.019040508940815926, "signal/frontier_ece_reward/group_std_mean": 0.02431493140757084, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002380063617601991, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002380063617601991, "step": 135 }, { "calibration/aurc": 0.15735400009669226, "calibration/batch_distribution_entropy": 0.867213508049151, "calibration/buffer_distribution_entropy": 0.7960778689126401, "calibration/confidence_entropy": 0.43905768456117134, "calibration/coverage@0%": 0.031665669247906085, "calibration/coverage@1%": 0.031665669247906085, "calibration/coverage@10%": 0.36938394329841706, "calibration/coverage@15%": 0.577238562485273, "calibration/coverage@20%": 0.7212511045241308, "calibration/coverage@25%": 0.8655174528200844, "calibration/coverage@30%": 0.9547358513805883, "calibration/coverage@5%": 0.06221147821476769, "calibration/ece": 0.10685238076566583, "calibration/mean_confidence": 0.6762039937526211, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111093, "completions/max_length": 3602.8, "completions/max_terminated_length": 3602.8, "completions/mean_length": 773.11328125, "completions/mean_terminated_length": 784.2122314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 239.6, "epoch": 0.33599580005249935, "grad_norm": 0.00038202741416171193, "learning_rate": 2.0481927710843377e-06, "loss": -0.0127, "num_tokens": 292533412.0, "reward": 1.0550405263900757, "reward_std": 0.12688146978616716, "rewards/accuracy_reward": 0.6841145992279053, "rewards/brier_reward": 0.8033087968826294, "rewards/confidence_uniqueness_reward": 0.9210085034370422, "rewards/format_reward": 0.9856770753860473, "rewards/frontier_aurc_reward": -0.0011435442487709225, "rewards/frontier_coverage_1": 0.028446093632373957, "rewards/frontier_coverage_10": 0.028446093632373957, "rewards/frontier_coverage_15": 0.028446093632373957, "rewards/frontier_coverage_20": 0.028446093632373957, "rewards/frontier_coverage_25": 0.027299534215126188, "rewards/frontier_coverage_5": 0.028446093632373957, "rewards/frontier_ece_reward": 0.012727185152471066, "signal/accuracy_reward/centered_abs_mean": 0.1436794728040695, "signal/accuracy_reward/group_std_mean": 0.19504518210887908, "signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07183973640203475, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07183973640203475, "signal/advantage_abs_mean": 0.09040538519620896, "signal/advantage_pre_scale_abs_mean": 0.09040538519620896, "signal/advantage_pre_scale_std": 0.1635303646326065, "signal/advantage_std": 0.1635303646326065, "signal/brier_reward/centered_abs_mean": 0.14187564551830292, "signal/brier_reward/group_std_mean": 0.18244777321815492, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017734455689787865, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017734455689787865, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.041977598518133166, "signal/confidence_uniqueness_reward/group_std_mean": 0.06911587193608285, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005247199814766646, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005247199814766646, "signal/format_reward/centered_abs_mean": 0.02453884594142437, "signal/format_reward/group_std_mean": 0.04877747595310211, "signal/format_reward/group_zero_std_frac": 0.7888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012269422970712185, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012269422970712185, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011516727041453122, "signal/frontier_aurc_reward/group_std_mean": 0.001979802688583732, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0614940876839682e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0614940876839682e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1493788868188858, "signal/frontier_coverage_1/group_std_mean": 0.20525516271591188, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_10/centered_abs_mean": 0.1493788868188858, "signal/frontier_coverage_10/group_std_mean": 0.20525516271591188, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_15/centered_abs_mean": 0.1493788868188858, "signal/frontier_coverage_15/group_std_mean": 0.20525516271591188, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_20/centered_abs_mean": 0.1493788868188858, "signal/frontier_coverage_20/group_std_mean": 0.20525516271591188, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_25/centered_abs_mean": 0.14764404296875, "signal/frontier_coverage_25/group_std_mean": 0.20283401310443877, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002642828319221735, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002642828319221735, "signal/frontier_coverage_5/centered_abs_mean": 0.1493788868188858, "signal/frontier_coverage_5/group_std_mean": 0.20525516271591188, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026738820131868126, "signal/frontier_ece_reward/centered_abs_mean": 0.016555101424455643, "signal/frontier_ece_reward/group_std_mean": 0.021539781242609024, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020693876780569554, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020693876780569554, "step": 140 }, { "calibration/aurc": 0.17760189147420782, "calibration/batch_distribution_entropy": 0.8614810747703359, "calibration/buffer_distribution_entropy": 0.8124016039305744, "calibration/confidence_entropy": 0.4269929605650483, "calibration/coverage@0%": 0.04225979049059177, "calibration/coverage@1%": 0.04225979049059177, "calibration/coverage@10%": 0.29135738789953486, "calibration/coverage@15%": 0.5114959489018647, "calibration/coverage@20%": 0.6514218042908502, "calibration/coverage@25%": 0.7691949016487275, "calibration/coverage@30%": 0.8515852075218829, "calibration/coverage@5%": 0.07188663538059455, "calibration/ece": 0.11239056071129601, "calibration/mean_confidence": 0.6774057122905555, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014756944444444442, "completions/max_length": 3140.8, "completions/max_terminated_length": 3140.8, "completions/mean_length": 768.288916015625, "completions/mean_terminated_length": 779.8587036132812, "completions/min_length": 0.0, "completions/min_terminated_length": 228.8, "epoch": 0.34799565005437433, "grad_norm": 0.0003542336344253272, "learning_rate": 1.8975903614457832e-06, "loss": -0.0128, "num_tokens": 304448708.0, "reward": 1.0733104705810548, "reward_std": 0.12197275906801223, "rewards/accuracy_reward": 0.7184027671813965, "rewards/brier_reward": 0.8237447381019593, "rewards/confidence_uniqueness_reward": 0.9106087207794189, "rewards/format_reward": 0.98515625, "rewards/frontier_aurc_reward": -0.0011269306181930006, "rewards/frontier_coverage_1": 0.02765751425176859, "rewards/frontier_coverage_10": 0.02765751425176859, "rewards/frontier_coverage_15": 0.02765751425176859, "rewards/frontier_coverage_20": 0.02765751425176859, "rewards/frontier_coverage_25": 0.029555964469909667, "rewards/frontier_coverage_5": 0.02765751425176859, "rewards/frontier_ece_reward": 0.014020322076976299, "signal/accuracy_reward/centered_abs_mean": 0.1314453125, "signal/accuracy_reward/group_std_mean": 0.1816246747970581, "signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06572265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06572265625, "signal/advantage_abs_mean": 0.08338052183389663, "signal/advantage_pre_scale_abs_mean": 0.08338052183389663, "signal/advantage_pre_scale_std": 0.16272049248218537, "signal/advantage_std": 0.16272049248218537, "signal/brier_reward/centered_abs_mean": 0.12779132276773453, "signal/brier_reward/group_std_mean": 0.16802054941654204, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015973915345966816, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015973915345966816, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.046132729202508924, "signal/confidence_uniqueness_reward/group_std_mean": 0.07453691065311432, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0057665911503136155, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0057665911503136155, "signal/format_reward/centered_abs_mean": 0.02614474855363369, "signal/format_reward/group_std_mean": 0.05145877227187157, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013072374276816845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013072374276816845, "signal/frontier_aurc_reward/centered_abs_mean": 0.001297543675173074, "signal/frontier_aurc_reward/group_std_mean": 0.0022464465117082, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3226030680234545e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3226030680234545e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1275523856282234, "signal/frontier_coverage_1/group_std_mean": 0.17685183584690095, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_10/centered_abs_mean": 0.1275523856282234, "signal/frontier_coverage_10/group_std_mean": 0.17685183584690095, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_15/centered_abs_mean": 0.1275523856282234, "signal/frontier_coverage_15/group_std_mean": 0.17685183584690095, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_20/centered_abs_mean": 0.1275523856282234, "signal/frontier_coverage_20/group_std_mean": 0.17685183584690095, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_25/centered_abs_mean": 0.09737538546323776, "signal/frontier_coverage_25/group_std_mean": 0.13730859458446504, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017430193023756147, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017430193023756147, "signal/frontier_coverage_5/centered_abs_mean": 0.1275523856282234, "signal/frontier_coverage_5/group_std_mean": 0.17685183584690095, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022831874433904887, "signal/frontier_ece_reward/centered_abs_mean": 0.014937486127018928, "signal/frontier_ece_reward/group_std_mean": 0.01869678348302841, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001867185765877366, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001867185765877366, "step": 145 }, { "calibration/aurc": 0.1711177612994187, "calibration/batch_distribution_entropy": 0.7979294550533148, "calibration/buffer_distribution_entropy": 0.8252856931612047, "calibration/confidence_entropy": 0.3867997528995013, "calibration/coverage@0%": 0.028857090429138067, "calibration/coverage@1%": 0.028857090429138067, "calibration/coverage@10%": 0.39941504381293946, "calibration/coverage@15%": 0.4653468268344284, "calibration/coverage@20%": 0.6041595882856419, "calibration/coverage@25%": 0.7318645921120241, "calibration/coverage@30%": 0.8313806917894677, "calibration/coverage@5%": 0.20213240230711796, "calibration/ece": 0.14017483881446374, "calibration/mean_confidence": 0.7112044312383862, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 3437.8, "completions/max_terminated_length": 3437.8, "completions/mean_length": 839.7661499023437, "completions/mean_terminated_length": 848.5783813476562, "completions/min_length": 0.0, "completions/min_terminated_length": 249.6, "epoch": 0.3599955000562493, "grad_norm": 0.00046212406596168876, "learning_rate": 1.7469879518072292e-06, "loss": -0.008, "num_tokens": 317233150.0, "reward": 1.065112328529358, "reward_std": 0.12633269131183625, "rewards/accuracy_reward": 0.70234375, "rewards/brier_reward": 0.8162197589874267, "rewards/confidence_uniqueness_reward": 0.8988808989524841, "rewards/format_reward": 0.9894965291023254, "rewards/frontier_aurc_reward": -0.0015927208121865987, "rewards/frontier_coverage_1": 0.03101687040179968, "rewards/frontier_coverage_10": 0.03101687040179968, "rewards/frontier_coverage_15": 0.03101687040179968, "rewards/frontier_coverage_20": 0.031029899418354035, "rewards/frontier_coverage_25": 0.0334394596517086, "rewards/frontier_coverage_5": 0.03101687040179968, "rewards/frontier_ece_reward": 0.011666352301836014, "signal/accuracy_reward/centered_abs_mean": 0.15368380695581435, "signal/accuracy_reward/group_std_mean": 0.1997191309928894, "signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07684190347790718, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07684190347790718, "signal/advantage_abs_mean": 0.09196306616067887, "signal/advantage_pre_scale_abs_mean": 0.09196306616067887, "signal/advantage_pre_scale_std": 0.165494641661644, "signal/advantage_std": 0.165494641661644, "signal/brier_reward/centered_abs_mean": 0.13116701394319535, "signal/brier_reward/group_std_mean": 0.17333021759986877, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01639587674289942, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01639587674289942, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.046421286463737485, "signal/confidence_uniqueness_reward/group_std_mean": 0.07029250860214234, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005802660807967186, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005802660807967186, "signal/format_reward/centered_abs_mean": 0.01837565079331398, "signal/format_reward/group_std_mean": 0.0379862654954195, "signal/format_reward/group_zero_std_frac": 0.8305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00918782539665699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00918782539665699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020536962430924175, "signal/frontier_aurc_reward/group_std_mean": 0.003437606617808342, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.67611584806582e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.67611584806582e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11324008107185364, "signal/frontier_coverage_1/group_std_mean": 0.1635311007499695, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020269973436370494, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020269973436370494, "signal/frontier_coverage_10/centered_abs_mean": 0.11324008107185364, "signal/frontier_coverage_10/group_std_mean": 0.1635311007499695, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020269973436370494, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020269973436370494, "signal/frontier_coverage_15/centered_abs_mean": 0.11324008107185364, "signal/frontier_coverage_15/group_std_mean": 0.1635311007499695, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020269973436370494, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020269973436370494, "signal/frontier_coverage_20/centered_abs_mean": 0.10615915805101395, "signal/frontier_coverage_20/group_std_mean": 0.15414920151233674, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019002489047124983, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019002489047124983, "signal/frontier_coverage_25/centered_abs_mean": 0.060843870788812635, "signal/frontier_coverage_25/group_std_mean": 0.08873464614152908, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001089105277787894, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001089105277787894, "signal/frontier_coverage_5/centered_abs_mean": 0.11324008107185364, "signal/frontier_coverage_5/group_std_mean": 0.1635311007499695, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020269973436370494, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020269973436370494, "signal/frontier_ece_reward/centered_abs_mean": 0.01169421263039112, "signal/frontier_ece_reward/group_std_mean": 0.014634997583925724, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00146177657879889, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00146177657879889, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.16175791904643544, "eval_calibration/batch_distribution_entropy": 0.7352031129148272, "eval_calibration/buffer_distribution_entropy": 0.832478781306993, "eval_calibration/confidence_entropy": 0.39508010082462275, "eval_calibration/coverage@0%": 0.19808467741935484, "eval_calibration/coverage@1%": 0.19808467741935484, "eval_calibration/coverage@10%": 0.47211021505376344, "eval_calibration/coverage@15%": 0.5816532258064516, "eval_calibration/coverage@20%": 0.7856182795698925, "eval_calibration/coverage@25%": 0.8489583333333334, "eval_calibration/coverage@30%": 0.9010416666666666, "eval_calibration/coverage@5%": 0.21370967741935484, "eval_calibration/ece": 0.16735424483653735, "eval_calibration/mean_confidence": 0.749357968833574, "eval_completions/clipped_ratio": 0.006944444444444457, "eval_completions/max_length": 2777.6666666666665, "eval_completions/max_terminated_length": 2777.6666666666665, "eval_completions/mean_length": 823.3238016764323, "eval_completions/mean_terminated_length": 829.1297912597656, "eval_completions/min_length": 111.33333333333333, "eval_completions/min_terminated_length": 290.0, "eval_loss": 0.0, "eval_num_tokens": 317233150.0, "eval_reward": 1.0572884281476338, "eval_reward_std": 0.25147593518098194, "eval_rewards/accuracy_reward": 0.6996527711550394, "eval_rewards/brier_reward": 0.8065233925978342, "eval_rewards/confidence_uniqueness_reward": 0.8552062610785166, "eval_rewards/format_reward": 0.9921875099341074, "eval_rewards/frontier_aurc_reward": -0.00195368086375917, "eval_rewards/frontier_coverage_1": 0.021662883625443403, "eval_rewards/frontier_coverage_10": 0.021662883625443403, "eval_rewards/frontier_coverage_15": 0.021662883625443403, "eval_rewards/frontier_coverage_20": 0.02181592263514176, "eval_rewards/frontier_coverage_25": 0.035129744869967304, "eval_rewards/frontier_coverage_5": 0.021662883625443403, "eval_rewards/frontier_ece_reward": 0.008933214703574777, "eval_runtime": 198.6148, "eval_samples_per_second": 5.035, "eval_signal/accuracy_reward/centered_abs_mean": 0.4021267344554265, "eval_signal/accuracy_reward/group_std_mean": 0.4533983866373698, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20106336722771326, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20106336722771326, "eval_signal/advantage_abs_mean": 0.2142608513434728, "eval_signal/advantage_pre_scale_abs_mean": 0.2142608513434728, "eval_signal/advantage_pre_scale_std": 0.2503946051001549, "eval_signal/advantage_std": 0.2503946051001549, "eval_signal/brier_reward/centered_abs_mean": 0.22845095644394556, "eval_signal/brier_reward/group_std_mean": 0.28837570548057556, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028556369555493195, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.028556369555493195, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06686499528586864, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09637108817696571, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00835812441073358, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00835812441073358, "eval_signal/format_reward/centered_abs_mean": 0.015136718594779571, "eval_signal/format_reward/group_std_mean": 0.04419417337824901, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359297389786, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359297389786, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003255114386168619, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0065320210220913095, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.82665494827476e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.82665494827476e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.15939685453971228, "eval_signal/frontier_coverage_1/group_std_mean": 0.28508878250916797, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028532035648822784, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028532035648822784, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.15939685453971228, "eval_signal/frontier_coverage_10/group_std_mean": 0.28508878250916797, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028532035648822784, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028532035648822784, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.15939685453971228, "eval_signal/frontier_coverage_15/group_std_mean": 0.28508878250916797, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028532035648822784, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028532035648822784, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.12750840187072754, "eval_signal/frontier_coverage_20/group_std_mean": 0.23698227355877557, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022824003632801273, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022824003632801273, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.07293465360999107, "eval_signal/frontier_coverage_25/group_std_mean": 0.12300009404619534, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013055303134024143, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013055303134024143, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.15939685453971228, "eval_signal/frontier_coverage_5/group_std_mean": 0.28508878250916797, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028532035648822784, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028532035648822784, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.013653319949905077, "eval_signal/frontier_ece_reward/group_std_mean": 0.017306591384112835, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017066649937381346, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017066649937381346, "eval_steps_per_second": 0.03, "step": 150 }, { "calibration/aurc": 0.15804363675783265, "calibration/batch_distribution_entropy": 0.8399256406752158, "calibration/buffer_distribution_entropy": 0.8364387748440253, "calibration/confidence_entropy": 0.4143819803032832, "calibration/coverage@0%": 0.014081538294168843, "calibration/coverage@1%": 0.014081538294168843, "calibration/coverage@10%": 0.4197884627652546, "calibration/coverage@15%": 0.5361398299340652, "calibration/coverage@20%": 0.6640488229008608, "calibration/coverage@25%": 0.8362543516100958, "calibration/coverage@30%": 0.9181149369016536, "calibration/coverage@5%": 0.0701024807025458, "calibration/ece": 0.10372717760673841, "calibration/mean_confidence": 0.7007285385392891, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007725694444444442, "completions/max_length": 3224.8, "completions/max_terminated_length": 3224.8, "completions/mean_length": 815.9788208007812, "completions/mean_terminated_length": 822.4533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 248.4, "epoch": 0.3719953500581243, "grad_norm": 0.0004315991827752441, "learning_rate": 1.5963855421686747e-06, "loss": -0.0049, "num_tokens": 329740938.0, "reward": 1.0920594453811645, "reward_std": 0.11862210929393768, "rewards/accuracy_reward": 0.7427083373069763, "rewards/brier_reward": 0.8469986200332642, "rewards/confidence_uniqueness_reward": 0.9098951697349549, "rewards/format_reward": 0.9921874880790711, "rewards/frontier_aurc_reward": -0.001068349787965417, "rewards/frontier_coverage_1": 0.032322213798761365, "rewards/frontier_coverage_10": 0.032322213798761365, "rewards/frontier_coverage_15": 0.032322213798761365, "rewards/frontier_coverage_20": 0.033581113815307616, "rewards/frontier_coverage_25": 0.053568636626005174, "rewards/frontier_coverage_5": 0.032322213798761365, "rewards/frontier_ece_reward": 0.00915743401274085, "signal/accuracy_reward/centered_abs_mean": 0.15378689169883727, "signal/accuracy_reward/group_std_mean": 0.2032044380903244, "signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07689344584941864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07689344584941864, "signal/advantage_abs_mean": 0.08521311953663827, "signal/advantage_pre_scale_abs_mean": 0.08521311953663827, "signal/advantage_pre_scale_std": 0.15650778114795685, "signal/advantage_std": 0.15650778114795685, "signal/brier_reward/centered_abs_mean": 0.11616129875183105, "signal/brier_reward/group_std_mean": 0.1569095641374588, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014520162343978881, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014520162343978881, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03967732265591621, "signal/confidence_uniqueness_reward/group_std_mean": 0.05846917554736138, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004959665331989526, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004959665331989526, "signal/format_reward/centered_abs_mean": 0.013953992887400091, "signal/format_reward/group_std_mean": 0.02798333503305912, "signal/format_reward/group_zero_std_frac": 0.8805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0069769964437000455, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0069769964437000455, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014973450219258667, "signal/frontier_aurc_reward/group_std_mean": 0.0027009368874132632, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6802473803400063e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6802473803400063e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12041537314653397, "signal/frontier_coverage_1/group_std_mean": 0.1706594407558441, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021554350852966307, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021554350852966307, "signal/frontier_coverage_10/centered_abs_mean": 0.12041537314653397, "signal/frontier_coverage_10/group_std_mean": 0.1706594407558441, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021554350852966307, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021554350852966307, "signal/frontier_coverage_15/centered_abs_mean": 0.12041537314653397, "signal/frontier_coverage_15/group_std_mean": 0.1706594407558441, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021554350852966307, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021554350852966307, "signal/frontier_coverage_20/centered_abs_mean": 0.08516337871551513, "signal/frontier_coverage_20/group_std_mean": 0.12315509170293808, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015244244365021586, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015244244365021586, "signal/frontier_coverage_25/centered_abs_mean": 0.05519420728087425, "signal/frontier_coverage_25/group_std_mean": 0.07446658313274383, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009879762423224748, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009879762423224748, "signal/frontier_coverage_5/centered_abs_mean": 0.12041537314653397, "signal/frontier_coverage_5/group_std_mean": 0.1706594407558441, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021554350852966307, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021554350852966307, "signal/frontier_ece_reward/centered_abs_mean": 0.008705221116542816, "signal/frontier_ece_reward/group_std_mean": 0.011256015487015247, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001088152639567852, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001088152639567852, "step": 155 }, { "calibration/aurc": 0.1396131852381405, "calibration/batch_distribution_entropy": 0.8001701977514258, "calibration/buffer_distribution_entropy": 0.8432206775920253, "calibration/confidence_entropy": 0.42837628908071357, "calibration/coverage@0%": 0.038418853892540475, "calibration/coverage@1%": 0.038418853892540475, "calibration/coverage@10%": 0.6409266970960997, "calibration/coverage@15%": 0.7230105375149384, "calibration/coverage@20%": 0.7985909217488165, "calibration/coverage@25%": 0.8646003898635477, "calibration/coverage@30%": 0.8846560846560847, "calibration/coverage@5%": 0.18981191832059213, "calibration/ece": 0.12949856679199157, "calibration/mean_confidence": 0.7311427370325003, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010590277777777768, "completions/max_length": 3660.8, "completions/max_terminated_length": 3660.8, "completions/mean_length": 833.2264892578125, "completions/mean_terminated_length": 842.1909423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 230.8, "epoch": 0.38399520005999926, "grad_norm": 0.00039733736775815487, "learning_rate": 1.4457831325301204e-06, "loss": -0.0093, "num_tokens": 342427003.0, "reward": 1.054310917854309, "reward_std": 0.12334007620811463, "rewards/accuracy_reward": 0.6795138835906982, "rewards/brier_reward": 0.8059031248092652, "rewards/confidence_uniqueness_reward": 0.9150711178779602, "rewards/format_reward": 0.9894097328186036, "rewards/frontier_aurc_reward": -0.0016405290691182018, "rewards/frontier_coverage_1": 0.03431166112422943, "rewards/frontier_coverage_10": 0.03431166112422943, "rewards/frontier_coverage_15": 0.03431166112422943, "rewards/frontier_coverage_20": 0.031892279908061025, "rewards/frontier_coverage_25": 0.05291588976979256, "rewards/frontier_coverage_5": 0.03431166112422943, "rewards/frontier_ece_reward": 0.006254972610622645, "signal/accuracy_reward/centered_abs_mean": 0.15179036557674408, "signal/accuracy_reward/group_std_mean": 0.19949381947517394, "signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07589518278837204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07589518278837204, "signal/advantage_abs_mean": 0.08981405347585678, "signal/advantage_pre_scale_abs_mean": 0.08981405347585678, "signal/advantage_pre_scale_std": 0.1621391087770462, "signal/advantage_std": 0.1621391087770462, "signal/brier_reward/centered_abs_mean": 0.133778178691864, "signal/brier_reward/group_std_mean": 0.17374806702136994, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016722272336483, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016722272336483, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03996127396821976, "signal/confidence_uniqueness_reward/group_std_mean": 0.06208924725651741, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00499515924602747, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00499515924602747, "signal/format_reward/centered_abs_mean": 0.01856553815305233, "signal/format_reward/group_std_mean": 0.037125248461961746, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009282769076526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009282769076526165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018948239739984274, "signal/frontier_aurc_reward/group_std_mean": 0.0033697550650686027, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.39173486281652e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.39173486281652e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1356187418103218, "signal/frontier_coverage_1/group_std_mean": 0.18712888658046722, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002427575411275029, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002427575411275029, "signal/frontier_coverage_10/centered_abs_mean": 0.1356187418103218, "signal/frontier_coverage_10/group_std_mean": 0.18712888658046722, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002427575411275029, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002427575411275029, "signal/frontier_coverage_15/centered_abs_mean": 0.1356187418103218, "signal/frontier_coverage_15/group_std_mean": 0.18712888658046722, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002427575411275029, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002427575411275029, "signal/frontier_coverage_20/centered_abs_mean": 0.08282427489757538, "signal/frontier_coverage_20/group_std_mean": 0.1169714629650116, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014825545251369477, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014825545251369477, "signal/frontier_coverage_25/centered_abs_mean": 0.060303305834531785, "signal/frontier_coverage_25/group_std_mean": 0.07883718758821487, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010794291738420725, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010794291738420725, "signal/frontier_coverage_5/centered_abs_mean": 0.1356187418103218, "signal/frontier_coverage_5/group_std_mean": 0.18712888658046722, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002427575411275029, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002427575411275029, "signal/frontier_ece_reward/centered_abs_mean": 0.008292005583643913, "signal/frontier_ece_reward/group_std_mean": 0.010889817215502261, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001036500697955489, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001036500697955489, "step": 160 }, { "calibration/aurc": 0.1401170386334239, "calibration/batch_distribution_entropy": 0.8628322122419168, "calibration/buffer_distribution_entropy": 0.8477875709184838, "calibration/confidence_entropy": 0.41931734632897494, "calibration/coverage@0%": 0.04187046632642211, "calibration/coverage@1%": 0.04187046632642211, "calibration/coverage@10%": 0.5120449317449931, "calibration/coverage@15%": 0.6430785094700009, "calibration/coverage@20%": 0.7207332569334908, "calibration/coverage@25%": 0.8257768317559318, "calibration/coverage@30%": 0.9018360536714092, "calibration/coverage@5%": 0.29195404140585296, "calibration/ece": 0.12538457953207308, "calibration/mean_confidence": 0.6443893716018676, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008680555555555558, "completions/max_length": 3490.6, "completions/max_terminated_length": 3490.6, "completions/mean_length": 873.49384765625, "completions/mean_terminated_length": 881.2654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 242.8, "epoch": 0.39599505006187424, "grad_norm": 0.0004413281276356429, "learning_rate": 1.2951807228915664e-06, "loss": -0.007, "num_tokens": 355628724.0, "reward": 1.0522673010826111, "reward_std": 0.11457638144493103, "rewards/accuracy_reward": 0.668836796283722, "rewards/brier_reward": 0.8068422317504883, "rewards/confidence_uniqueness_reward": 0.9226135849952698, "rewards/format_reward": 0.9913194417953491, "rewards/frontier_aurc_reward": -0.0013329184614121914, "rewards/frontier_coverage_1": 0.04676450602710247, "rewards/frontier_coverage_10": 0.04676450602710247, "rewards/frontier_coverage_15": 0.04676450602710247, "rewards/frontier_coverage_20": 0.040921327844262126, "rewards/frontier_coverage_25": 0.06650637164711952, "rewards/frontier_coverage_5": 0.04676450602710247, "rewards/frontier_ece_reward": 0.006077949050813913, "signal/accuracy_reward/centered_abs_mean": 0.1448296457529068, "signal/accuracy_reward/group_std_mean": 0.1931760638952255, "signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0724148228764534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0724148228764534, "signal/advantage_abs_mean": 0.08313089311122894, "signal/advantage_pre_scale_abs_mean": 0.08313089311122894, "signal/advantage_pre_scale_std": 0.15007005333900453, "signal/advantage_std": 0.15007005333900453, "signal/brier_reward/centered_abs_mean": 0.13480945378541948, "signal/brier_reward/group_std_mean": 0.17508352398872376, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016851181723177434, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016851181723177434, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03558523468673229, "signal/confidence_uniqueness_reward/group_std_mean": 0.05330366343259811, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004448154335841536, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004448154335841536, "signal/format_reward/centered_abs_mean": 0.014876301772892475, "signal/format_reward/group_std_mean": 0.028501024469733238, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007438150886446238, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007438150886446238, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014591423678211868, "signal/frontier_aurc_reward/group_std_mean": 0.0024731668643653395, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6118645473616196e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6118645473616196e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.155775585770607, "signal/frontier_coverage_1/group_std_mean": 0.20951978862285614, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027883827686309816, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027883827686309816, "signal/frontier_coverage_10/centered_abs_mean": 0.155775585770607, "signal/frontier_coverage_10/group_std_mean": 0.20951978862285614, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027883827686309816, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027883827686309816, "signal/frontier_coverage_15/centered_abs_mean": 0.155775585770607, "signal/frontier_coverage_15/group_std_mean": 0.20951978862285614, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027883827686309816, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027883827686309816, "signal/frontier_coverage_20/centered_abs_mean": 0.09291831254959107, "signal/frontier_coverage_20/group_std_mean": 0.12665492296218872, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016632377402856946, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016632377402856946, "signal/frontier_coverage_25/centered_abs_mean": 0.06688660979270936, "signal/frontier_coverage_25/group_std_mean": 0.08502381294965744, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011972703039646148, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011972703039646148, "signal/frontier_coverage_5/centered_abs_mean": 0.155775585770607, "signal/frontier_coverage_5/group_std_mean": 0.20951978862285614, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027883827686309816, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027883827686309816, "signal/frontier_ece_reward/centered_abs_mean": 0.009020310081541538, "signal/frontier_ece_reward/group_std_mean": 0.01170970220118761, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011275387601926922, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011275387601926922, "step": 165 }, { "calibration/aurc": 0.10870616500791086, "calibration/batch_distribution_entropy": 0.7903104452510206, "calibration/buffer_distribution_entropy": 0.8493497664534088, "calibration/confidence_entropy": 0.39392810200666045, "calibration/coverage@0%": 0.039693163843954195, "calibration/coverage@1%": 0.15010983051062085, "calibration/coverage@10%": 0.6249913825702296, "calibration/coverage@15%": 0.7163854370584731, "calibration/coverage@20%": 0.8282437601072432, "calibration/coverage@25%": 0.9081739226033421, "calibration/coverage@30%": 0.9605802000879506, "calibration/coverage@5%": 0.4011407911307888, "calibration/ece": 0.08378010703101495, "calibration/mean_confidence": 0.7437848913758066, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008506944444444465, "completions/max_length": 3674.0, "completions/max_terminated_length": 3674.0, "completions/mean_length": 838.0208374023438, "completions/mean_terminated_length": 845.2309448242188, "completions/min_length": 0.0, "completions/min_terminated_length": 239.8, "epoch": 0.4079949000637492, "grad_norm": 0.0004973475588485599, "learning_rate": 1.1445783132530121e-06, "loss": -0.0069, "num_tokens": 368371908.0, "reward": 1.0795949220657348, "reward_std": 0.11285789757966995, "rewards/accuracy_reward": 0.7223958373069763, "rewards/brier_reward": 0.8292520999908447, "rewards/confidence_uniqueness_reward": 0.9112018942832947, "rewards/format_reward": 0.9914930462837219, "rewards/frontier_aurc_reward": -0.001215806626714766, "rewards/frontier_coverage_1": 0.028353986889123918, "rewards/frontier_coverage_10": 0.028353986889123918, "rewards/frontier_coverage_15": 0.028459986671805382, "rewards/frontier_coverage_20": 0.033776380494236945, "rewards/frontier_coverage_25": 0.0912104532122612, "rewards/frontier_coverage_5": 0.028353986889123918, "rewards/frontier_ece_reward": 0.006769264675676822, "signal/accuracy_reward/centered_abs_mean": 0.1389865458011627, "signal/accuracy_reward/group_std_mean": 0.1862527459859848, "signal/accuracy_reward/group_zero_std_frac": 0.45555556416511533, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06949327290058135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06949327290058135, "signal/advantage_abs_mean": 0.08184980154037476, "signal/advantage_pre_scale_abs_mean": 0.08184980154037476, "signal/advantage_pre_scale_std": 0.15188938081264497, "signal/advantage_std": 0.15188938081264497, "signal/brier_reward/centered_abs_mean": 0.12720216065645218, "signal/brier_reward/group_std_mean": 0.1649218052625656, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015900270082056522, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015900270082056522, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03929706513881683, "signal/confidence_uniqueness_reward/group_std_mean": 0.05663086473941803, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004912133142352104, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004912133142352104, "signal/format_reward/centered_abs_mean": 0.014507378544658422, "signal/format_reward/group_std_mean": 0.0268052663654089, "signal/format_reward/group_zero_std_frac": 0.8916666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007253689272329211, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007253689272329211, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016262418590486049, "signal/frontier_aurc_reward/group_std_mean": 0.0029089401010423898, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.910972725658212e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.910972725658212e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13412159085273742, "signal/frontier_coverage_1/group_std_mean": 0.1826484888792038, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024007763247936966, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024007763247936966, "signal/frontier_coverage_10/centered_abs_mean": 0.13412159085273742, "signal/frontier_coverage_10/group_std_mean": 0.1826484888792038, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024007763247936966, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024007763247936966, "signal/frontier_coverage_15/centered_abs_mean": 0.13297670781612397, "signal/frontier_coverage_15/group_std_mean": 0.1811499148607254, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023802829906344413, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023802829906344413, "signal/frontier_coverage_20/centered_abs_mean": 0.07466747760772705, "signal/frontier_coverage_20/group_std_mean": 0.10277672857046127, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013365477789193392, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013365477789193392, "signal/frontier_coverage_25/centered_abs_mean": 0.0674952432513237, "signal/frontier_coverage_25/group_std_mean": 0.08588269799947738, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012081648223102094, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012081648223102094, "signal/frontier_coverage_5/centered_abs_mean": 0.13412159085273742, "signal/frontier_coverage_5/group_std_mean": 0.1826484888792038, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024007763247936966, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024007763247936966, "signal/frontier_ece_reward/centered_abs_mean": 0.008575642108917236, "signal/frontier_ece_reward/group_std_mean": 0.011006982997059822, "signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010719552636146545, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010719552636146545, "step": 170 }, { "calibration/aurc": 0.11067553128722692, "calibration/batch_distribution_entropy": 0.8481618438645689, "calibration/buffer_distribution_entropy": 0.8483553201550436, "calibration/confidence_entropy": 0.40064739663776583, "calibration/coverage@0%": 0.03877597911227154, "calibration/coverage@1%": 0.1033262436625361, "calibration/coverage@10%": 0.5340208553130162, "calibration/coverage@15%": 0.7327274043747142, "calibration/coverage@20%": 0.8668559271692431, "calibration/coverage@25%": 0.9450903394255874, "calibration/coverage@30%": 0.9826005221932114, "calibration/coverage@5%": 0.30878133853807876, "calibration/ece": 0.0929162421597211, "calibration/mean_confidence": 0.6861792539951647, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00859375, "completions/max_length": 3590.6, "completions/max_terminated_length": 3590.6, "completions/mean_length": 870.5375122070312, "completions/mean_terminated_length": 878.083056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 246.8, "epoch": 0.4199947500656242, "grad_norm": 0.00044883930240757763, "learning_rate": 9.93975903614458e-07, "loss": -0.0071, "num_tokens": 381508468.0, "reward": 1.0745736122131349, "reward_std": 0.11894840151071548, "rewards/accuracy_reward": 0.7121527671813965, "rewards/brier_reward": 0.8242484927177429, "rewards/confidence_uniqueness_reward": 0.9156982779502869, "rewards/format_reward": 0.9913194417953491, "rewards/frontier_aurc_reward": -0.0012379781110212207, "rewards/frontier_coverage_1": 0.030451742745935918, "rewards/frontier_coverage_10": 0.030451742745935918, "rewards/frontier_coverage_15": 0.030320987850427628, "rewards/frontier_coverage_20": 0.03499968759715557, "rewards/frontier_coverage_25": 0.09922654330730438, "rewards/frontier_coverage_5": 0.030451742745935918, "rewards/frontier_ece_reward": 0.00628520967438817, "signal/accuracy_reward/centered_abs_mean": 0.14856770932674407, "signal/accuracy_reward/group_std_mean": 0.2042417496442795, "signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07428385466337203, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07428385466337203, "signal/advantage_abs_mean": 0.08372878432273864, "signal/advantage_pre_scale_abs_mean": 0.08372878432273864, "signal/advantage_pre_scale_std": 0.1532825142145157, "signal/advantage_std": 0.1532825142145157, "signal/brier_reward/centered_abs_mean": 0.12630099207162857, "signal/brier_reward/group_std_mean": 0.16852413713932038, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01578762400895357, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01578762400895357, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037941998615860936, "signal/confidence_uniqueness_reward/group_std_mean": 0.055057863146066664, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004742749826982617, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004742749826982617, "signal/format_reward/centered_abs_mean": 0.014756944379769266, "signal/format_reward/group_std_mean": 0.026993418857455254, "signal/format_reward/group_zero_std_frac": 0.8916666626930236, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007378472189884633, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007378472189884633, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017084946855902671, "signal/frontier_aurc_reward/group_std_mean": 0.0031451730988919734, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.058205293200444e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.058205293200444e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14232682287693024, "signal/frontier_coverage_1/group_std_mean": 0.19747399687767028, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025476500391960143, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025476500391960143, "signal/frontier_coverage_10/centered_abs_mean": 0.14232682287693024, "signal/frontier_coverage_10/group_std_mean": 0.19747399687767028, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025476500391960143, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025476500391960143, "signal/frontier_coverage_15/centered_abs_mean": 0.14066008031368255, "signal/frontier_coverage_15/group_std_mean": 0.19527221620082855, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002517815353348851, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002517815353348851, "signal/frontier_coverage_20/centered_abs_mean": 0.07579737156629562, "signal/frontier_coverage_20/group_std_mean": 0.10598736554384232, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013567729154601693, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013567729154601693, "signal/frontier_coverage_25/centered_abs_mean": 0.07291418462991714, "signal/frontier_coverage_25/group_std_mean": 0.09304469972848892, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013051638146862389, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013051638146862389, "signal/frontier_coverage_5/centered_abs_mean": 0.14232682287693024, "signal/frontier_coverage_5/group_std_mean": 0.19747399687767028, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025476500391960143, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025476500391960143, "signal/frontier_ece_reward/centered_abs_mean": 0.00866670086979866, "signal/frontier_ece_reward/group_std_mean": 0.011408805288374424, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010833376087248324, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010833376087248324, "step": 175 }, { "calibration/aurc": 0.0961958541907562, "calibration/batch_distribution_entropy": 0.8477025009746469, "calibration/buffer_distribution_entropy": 0.8498138843428966, "calibration/confidence_entropy": 0.4101348990305955, "calibration/coverage@0%": 0.088780644337697, "calibration/coverage@1%": 0.088780644337697, "calibration/coverage@10%": 0.5502983060324453, "calibration/coverage@15%": 0.807155930454841, "calibration/coverage@20%": 0.916858509725483, "calibration/coverage@25%": 0.9773861186549823, "calibration/coverage@30%": 0.9979057591623036, "calibration/coverage@5%": 0.355199305315759, "calibration/ece": 0.08873742643457942, "calibration/mean_confidence": 0.7072252329346721, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009548611111111138, "completions/max_length": 3770.4, "completions/max_terminated_length": 3770.4, "completions/mean_length": 837.043408203125, "completions/mean_terminated_length": 845.1139404296875, "completions/min_length": 0.0, "completions/min_terminated_length": 270.4, "epoch": 0.4319946000674992, "grad_norm": 0.00045111271901987493, "learning_rate": 8.433734939759036e-07, "loss": -0.0074, "num_tokens": 394251176.0, "reward": 1.0717910051345825, "reward_std": 0.12240722179412841, "rewards/accuracy_reward": 0.7144965171813965, "rewards/brier_reward": 0.807321059703827, "rewards/confidence_uniqueness_reward": 0.9133202791213989, "rewards/format_reward": 0.9904513955116272, "rewards/frontier_aurc_reward": -0.0018045842181891203, "rewards/frontier_coverage_1": 0.016695484053343534, "rewards/frontier_coverage_10": 0.016695484053343534, "rewards/frontier_coverage_15": 0.017037773295305668, "rewards/frontier_coverage_20": 0.028835199400782587, "rewards/frontier_coverage_25": 0.10415665209293365, "rewards/frontier_coverage_5": 0.016695484053343534, "rewards/frontier_ece_reward": 0.00549684651196003, "signal/accuracy_reward/centered_abs_mean": 0.15064561367034912, "signal/accuracy_reward/group_std_mean": 0.2003714770078659, "signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07532280683517456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07532280683517456, "signal/advantage_abs_mean": 0.08792258501052856, "signal/advantage_pre_scale_abs_mean": 0.08792258501052856, "signal/advantage_pre_scale_std": 0.15989961624145507, "signal/advantage_std": 0.15989961624145507, "signal/brier_reward/centered_abs_mean": 0.13230671286582946, "signal/brier_reward/group_std_mean": 0.17412598431110382, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016538339108228682, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016538339108228682, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.040982935577631, "signal/confidence_uniqueness_reward/group_std_mean": 0.06092101261019707, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005122866947203875, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005122866947203875, "signal/format_reward/centered_abs_mean": 0.01662326380610466, "signal/format_reward/group_std_mean": 0.03184187039732933, "signal/format_reward/group_zero_std_frac": 0.8666666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00831163190305233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00831163190305233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023748093051835896, "signal/frontier_aurc_reward/group_std_mean": 0.004061697609722614, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2509083868935704e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2509083868935704e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13885007798671722, "signal/frontier_coverage_1/group_std_mean": 0.18973132073879242, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024854163639247417, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024854163639247417, "signal/frontier_coverage_10/centered_abs_mean": 0.13885007798671722, "signal/frontier_coverage_10/group_std_mean": 0.18973132073879242, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024854163639247417, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024854163639247417, "signal/frontier_coverage_15/centered_abs_mean": 0.13509701192378998, "signal/frontier_coverage_15/group_std_mean": 0.18487387001514435, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002418236620724201, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002418236620724201, "signal/frontier_coverage_20/centered_abs_mean": 0.06975524574518203, "signal/frontier_coverage_20/group_std_mean": 0.095186148583889, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001248618890531361, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001248618890531361, "signal/frontier_coverage_25/centered_abs_mean": 0.0811617761850357, "signal/frontier_coverage_25/group_std_mean": 0.103651861846447, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014527957886457444, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014527957886457444, "signal/frontier_coverage_5/centered_abs_mean": 0.13885007798671722, "signal/frontier_coverage_5/group_std_mean": 0.18973132073879242, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024854163639247417, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024854163639247417, "signal/frontier_ece_reward/centered_abs_mean": 0.008750239573419093, "signal/frontier_ece_reward/group_std_mean": 0.011527445912361146, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010937799466773867, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010937799466773867, "step": 180 }, { "calibration/aurc": 0.18495376889754953, "calibration/batch_distribution_entropy": 0.8803191163454155, "calibration/buffer_distribution_entropy": 0.851482679855412, "calibration/confidence_entropy": 0.4354021479497231, "calibration/coverage@0%": 0.012587749815716776, "calibration/coverage@1%": 0.012587749815716776, "calibration/coverage@10%": 0.1918354407163473, "calibration/coverage@15%": 0.36826092817701916, "calibration/coverage@20%": 0.6374964095137642, "calibration/coverage@25%": 0.8755843999038427, "calibration/coverage@30%": 0.9279373368146213, "calibration/coverage@5%": 0.06898461665644785, "calibration/ece": 0.14425179951501588, "calibration/mean_confidence": 0.6684984209096279, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009461805555555558, "completions/max_length": 3679.8, "completions/max_terminated_length": 3679.8, "completions/mean_length": 818.1920043945313, "completions/mean_terminated_length": 826.032080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 217.8, "epoch": 0.44399445006937416, "grad_norm": 0.0004495714674703777, "learning_rate": 6.927710843373495e-07, "loss": -0.0085, "num_tokens": 406766796.0, "reward": 1.0636092185974122, "reward_std": 0.12089930176734924, "rewards/accuracy_reward": 0.6918402791023255, "rewards/brier_reward": 0.8123436450958252, "rewards/confidence_uniqueness_reward": 0.925143015384674, "rewards/format_reward": 0.9905382037162781, "rewards/frontier_aurc_reward": -0.0011275873170234264, "rewards/frontier_coverage_1": 0.027901495201513173, "rewards/frontier_coverage_10": 0.027901495201513173, "rewards/frontier_coverage_15": 0.02855590097606182, "rewards/frontier_coverage_20": 0.0363032516092062, "rewards/frontier_coverage_25": 0.1104502335190773, "rewards/frontier_coverage_5": 0.027901495201513173, "rewards/frontier_ece_reward": 0.00494370711967349, "signal/accuracy_reward/centered_abs_mean": 0.15333116352558135, "signal/accuracy_reward/group_std_mean": 0.20526001155376433, "signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07666558176279067, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07666558176279067, "signal/advantage_abs_mean": 0.08671480715274811, "signal/advantage_pre_scale_abs_mean": 0.08671480715274811, "signal/advantage_pre_scale_std": 0.15482214391231536, "signal/advantage_std": 0.15482214391231536, "signal/brier_reward/centered_abs_mean": 0.1261175572872162, "signal/brier_reward/group_std_mean": 0.164833265542984, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015764694660902023, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015764694660902023, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03562588319182396, "signal/confidence_uniqueness_reward/group_std_mean": 0.055293154716491696, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004453235398977995, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004453235398977995, "signal/format_reward/centered_abs_mean": 0.016520182229578496, "signal/format_reward/group_std_mean": 0.032335417345166206, "signal/format_reward/group_zero_std_frac": 0.8638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008260091114789248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008260091114789248, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013634230359457432, "signal/frontier_aurc_reward/group_std_mean": 0.0024055395508185027, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4405272415606306e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4405272415606306e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15479598343372344, "signal/frontier_coverage_1/group_std_mean": 0.20758814811706544, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002770848013460636, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002770848013460636, "signal/frontier_coverage_10/centered_abs_mean": 0.15479598343372344, "signal/frontier_coverage_10/group_std_mean": 0.20758814811706544, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002770848013460636, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002770848013460636, "signal/frontier_coverage_15/centered_abs_mean": 0.14861891269683838, "signal/frontier_coverage_15/group_std_mean": 0.19950321912765503, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002660278417170048, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002660278417170048, "signal/frontier_coverage_20/centered_abs_mean": 0.07016772180795669, "signal/frontier_coverage_20/group_std_mean": 0.09428980499505997, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012560022063553334, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012560022063553334, "signal/frontier_coverage_25/centered_abs_mean": 0.07735366076231003, "signal/frontier_coverage_25/group_std_mean": 0.09949797540903091, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013846305664628744, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013846305664628744, "signal/frontier_coverage_5/centered_abs_mean": 0.15479598343372344, "signal/frontier_coverage_5/group_std_mean": 0.20758814811706544, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002770848013460636, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002770848013460636, "signal/frontier_ece_reward/centered_abs_mean": 0.008560269139707088, "signal/frontier_ece_reward/group_std_mean": 0.011409120261669159, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001070033642463386, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001070033642463386, "step": 185 }, { "calibration/aurc": 0.15022940629833742, "calibration/batch_distribution_entropy": 0.8461052918312866, "calibration/buffer_distribution_entropy": 0.8533362490457753, "calibration/confidence_entropy": 0.44849927036648723, "calibration/coverage@0%": 0.021875, "calibration/coverage@1%": 0.021875, "calibration/coverage@10%": 0.4367375159578275, "calibration/coverage@15%": 0.576325098459507, "calibration/coverage@20%": 0.7084946935173707, "calibration/coverage@25%": 0.9614501740644037, "calibration/coverage@30%": 0.9932291666666668, "calibration/coverage@5%": 0.14235813765516317, "calibration/ece": 0.1212236417116653, "calibration/mean_confidence": 0.6864637556987978, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006249999999999978, "completions/max_length": 3390.8, "completions/max_terminated_length": 3390.8, "completions/mean_length": 811.7388916015625, "completions/mean_terminated_length": 816.8151123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 219.6, "epoch": 0.45599430007124914, "grad_norm": 0.0004526027769315988, "learning_rate": 5.421686746987952e-07, "loss": -0.0038, "num_tokens": 419200972.0, "reward": 1.086756706237793, "reward_std": 0.11896635293960571, "rewards/accuracy_reward": 0.7328124880790711, "rewards/brier_reward": 0.8254269242286683, "rewards/confidence_uniqueness_reward": 0.92621408700943, "rewards/format_reward": 0.99375, "rewards/frontier_aurc_reward": -0.0012867568526417016, "rewards/frontier_coverage_1": 0.01327955424785614, "rewards/frontier_coverage_10": 0.01327955424785614, "rewards/frontier_coverage_15": 0.01672282423824072, "rewards/frontier_coverage_20": 0.0356328509747982, "rewards/frontier_coverage_25": 0.13131238967180253, "rewards/frontier_coverage_5": 0.01327955424785614, "rewards/frontier_ece_reward": 0.004340594261884689, "signal/accuracy_reward/centered_abs_mean": 0.15990668535232544, "signal/accuracy_reward/group_std_mean": 0.21131123900413512, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07995334267616272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07995334267616272, "signal/advantage_abs_mean": 0.08612867295742035, "signal/advantage_pre_scale_abs_mean": 0.08612867295742035, "signal/advantage_pre_scale_std": 0.15164274871349334, "signal/advantage_std": 0.15164274871349334, "signal/brier_reward/centered_abs_mean": 0.11984222829341888, "signal/brier_reward/group_std_mean": 0.15931495130062104, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01498027853667736, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01498027853667736, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0317846491932869, "signal/confidence_uniqueness_reward/group_std_mean": 0.04874978512525559, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003973081149160862, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003973081149160862, "signal/format_reward/centered_abs_mean": 0.011197916697710752, "signal/format_reward/group_std_mean": 0.023881056532263755, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005598958348855376, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005598958348855376, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016745397355407477, "signal/frontier_aurc_reward/group_std_mean": 0.003021475113928318, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.997425981448032e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.997425981448032e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1465451642870903, "signal/frontier_coverage_1/group_std_mean": 0.19608235359191895, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002623158413916826, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002623158413916826, "signal/frontier_coverage_10/centered_abs_mean": 0.1465451642870903, "signal/frontier_coverage_10/group_std_mean": 0.19608235359191895, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002623158413916826, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002623158413916826, "signal/frontier_coverage_15/centered_abs_mean": 0.13391998708248137, "signal/frontier_coverage_15/group_std_mean": 0.1800734966993332, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002397167752496898, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002397167752496898, "signal/frontier_coverage_20/centered_abs_mean": 0.062118491530418395, "signal/frontier_coverage_20/group_std_mean": 0.08400460481643676, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011119209812022746, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011119209812022746, "signal/frontier_coverage_25/centered_abs_mean": 0.08528402894735336, "signal/frontier_coverage_25/group_std_mean": 0.11039264798164368, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015265840804204345, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015265840804204345, "signal/frontier_coverage_5/centered_abs_mean": 0.1465451642870903, "signal/frontier_coverage_5/group_std_mean": 0.19608235359191895, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002623158413916826, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002623158413916826, "signal/frontier_ece_reward/centered_abs_mean": 0.00753614604473114, "signal/frontier_ece_reward/group_std_mean": 0.010234573669731618, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009420182555913925, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009420182555913925, "step": 190 }, { "calibration/aurc": 0.14937174985204743, "calibration/batch_distribution_entropy": 0.8768565334857193, "calibration/buffer_distribution_entropy": 0.8564208478703119, "calibration/confidence_entropy": 0.4413662764250832, "calibration/coverage@0%": 0.041348916887709995, "calibration/coverage@1%": 0.08378923519009726, "calibration/coverage@10%": 0.40936488333469506, "calibration/coverage@15%": 0.5909852478554963, "calibration/coverage@20%": 0.6877917325599496, "calibration/coverage@25%": 0.794470757533946, "calibration/coverage@30%": 0.9077784145987675, "calibration/coverage@5%": 0.21624305560365026, "calibration/ece": 0.1298338782403467, "calibration/mean_confidence": 0.6607892197608017, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011111111111111117, "completions/max_length": 3528.2, "completions/max_terminated_length": 3528.2, "completions/mean_length": 832.9720458984375, "completions/mean_terminated_length": 842.4790161132812, "completions/min_length": 0.0, "completions/min_terminated_length": 214.2, "epoch": 0.46799415007312406, "grad_norm": 0.00047915452159941196, "learning_rate": 3.91566265060241e-07, "loss": -0.0095, "num_tokens": 431877674.0, "reward": 1.0597479343414307, "reward_std": 0.12377008944749832, "rewards/accuracy_reward": 0.6863715291023255, "rewards/brier_reward": 0.8096499562263488, "rewards/confidence_uniqueness_reward": 0.9205044031143188, "rewards/format_reward": 0.9888020873069763, "rewards/frontier_aurc_reward": -0.001665916945785284, "rewards/frontier_coverage_1": 0.029197129979729654, "rewards/frontier_coverage_10": 0.029197129979729654, "rewards/frontier_coverage_15": 0.029610903933644295, "rewards/frontier_coverage_20": 0.04197726622223854, "rewards/frontier_coverage_25": 0.1404498651623726, "rewards/frontier_coverage_5": 0.029197129979729654, "rewards/frontier_ece_reward": 0.004466280713677407, "signal/accuracy_reward/centered_abs_mean": 0.1545193150639534, "signal/accuracy_reward/group_std_mean": 0.2042325258255005, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0772596575319767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0772596575319767, "signal/advantage_abs_mean": 0.09082913100719452, "signal/advantage_pre_scale_abs_mean": 0.09082913100719452, "signal/advantage_pre_scale_std": 0.16084725856781007, "signal/advantage_std": 0.16084725856781007, "signal/brier_reward/centered_abs_mean": 0.12776512503623963, "signal/brier_reward/group_std_mean": 0.16529574990272522, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015970640629529954, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015970640629529954, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03753339871764183, "signal/confidence_uniqueness_reward/group_std_mean": 0.056541355699300765, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004691674839705229, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004691674839705229, "signal/format_reward/centered_abs_mean": 0.017822265438735486, "signal/format_reward/group_std_mean": 0.03260133340954781, "signal/format_reward/group_zero_std_frac": 0.8694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008911132719367743, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008911132719367743, "signal/frontier_aurc_reward/centered_abs_mean": 0.00225935832131654, "signal/frontier_aurc_reward/group_std_mean": 0.004012473439797759, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.044251254526898e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.044251254526898e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1396041989326477, "signal/frontier_coverage_1/group_std_mean": 0.18966357111930848, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024989150697365403, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024989150697365403, "signal/frontier_coverage_10/centered_abs_mean": 0.1396041989326477, "signal/frontier_coverage_10/group_std_mean": 0.18966357111930848, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024989150697365403, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024989150697365403, "signal/frontier_coverage_15/centered_abs_mean": 0.11660263985395432, "signal/frontier_coverage_15/group_std_mean": 0.1599918618798256, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002087187208235264, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002087187208235264, "signal/frontier_coverage_20/centered_abs_mean": 0.060534077882766726, "signal/frontier_coverage_20/group_std_mean": 0.08167311102151871, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010835599503479898, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010835599503479898, "signal/frontier_coverage_25/centered_abs_mean": 0.0967460110783577, "signal/frontier_coverage_25/group_std_mean": 0.12314206212759018, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017317535821348429, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017317535821348429, "signal/frontier_coverage_5/centered_abs_mean": 0.1396041989326477, "signal/frontier_coverage_5/group_std_mean": 0.18966357111930848, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024989150697365403, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024989150697365403, "signal/frontier_ece_reward/centered_abs_mean": 0.007368762884289027, "signal/frontier_ece_reward/group_std_mean": 0.010059486515820027, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009210953605361283, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009210953605361283, "step": 195 }, { "calibration/aurc": 0.1517635493969927, "calibration/batch_distribution_entropy": 0.8032683764135182, "calibration/buffer_distribution_entropy": 0.8594278407178504, "calibration/confidence_entropy": 0.4228073073041322, "calibration/coverage@0%": 0.05823443307656014, "calibration/coverage@1%": 0.05823443307656014, "calibration/coverage@10%": 0.4063801377610362, "calibration/coverage@15%": 0.6229884169428966, "calibration/coverage@20%": 0.6913565883420245, "calibration/coverage@25%": 0.8279600708198709, "calibration/coverage@30%": 0.941512982494816, "calibration/coverage@5%": 0.15745867617066028, "calibration/ece": 0.12641913652799427, "calibration/mean_confidence": 0.7471995840116807, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006770833333333326, "completions/max_length": 3694.8, "completions/max_terminated_length": 3694.8, "completions/mean_length": 800.4954956054687, "completions/mean_terminated_length": 805.9992065429688, "completions/min_length": 0.0, "completions/min_terminated_length": 240.0, "epoch": 0.47999400007499904, "grad_norm": 0.0005283089121803641, "learning_rate": 2.409638554216868e-07, "loss": -0.0058, "num_tokens": 444167190.0, "reward": 1.0729887247085572, "reward_std": 0.11522095501422883, "rewards/accuracy_reward": 0.7047742962837219, "rewards/brier_reward": 0.8240418195724487, "rewards/confidence_uniqueness_reward": 0.9152790069580078, "rewards/format_reward": 0.9932291626930236, "rewards/frontier_aurc_reward": -0.0018710391130298376, "rewards/frontier_coverage_1": 0.030061314441263677, "rewards/frontier_coverage_10": 0.030090765841305257, "rewards/frontier_coverage_15": 0.030872286297380924, "rewards/frontier_coverage_20": 0.04789535701274872, "rewards/frontier_coverage_25": 0.1694835215806961, "rewards/frontier_coverage_5": 0.030061314441263677, "rewards/frontier_ece_reward": 0.004374683182686567, "signal/accuracy_reward/centered_abs_mean": 0.1409125432372093, "signal/accuracy_reward/group_std_mean": 0.18542096316814421, "signal/accuracy_reward/group_zero_std_frac": 0.47222223281860354, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07045627161860465, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07045627161860465, "signal/advantage_abs_mean": 0.08473498374223709, "signal/advantage_pre_scale_abs_mean": 0.08473498374223709, "signal/advantage_pre_scale_std": 0.1543968439102173, "signal/advantage_std": 0.1543968439102173, "signal/brier_reward/centered_abs_mean": 0.11732118874788285, "signal/brier_reward/group_std_mean": 0.15466432571411132, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014665148593485356, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014665148593485356, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.035984426736831665, "signal/confidence_uniqueness_reward/group_std_mean": 0.054205088317394255, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004498053342103958, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004498053342103958, "signal/format_reward/centered_abs_mean": 0.012174479011446238, "signal/format_reward/group_std_mean": 0.02487517409026623, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006087239505723119, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006087239505723119, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023178313160315154, "signal/frontier_aurc_reward/group_std_mean": 0.004119249107316136, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.148917651036754e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.148917651036754e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10867589861154556, "signal/frontier_coverage_1/group_std_mean": 0.15358475148677825, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019452984910458327, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019452984910458327, "signal/frontier_coverage_10/centered_abs_mean": 0.10854685455560684, "signal/frontier_coverage_10/group_std_mean": 0.15342499017715455, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019429885549470782, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019429885549470782, "signal/frontier_coverage_15/centered_abs_mean": 0.08157736957073211, "signal/frontier_coverage_15/group_std_mean": 0.11773104816675187, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014602348441258074, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014602348441258074, "signal/frontier_coverage_20/centered_abs_mean": 0.052034994959831236, "signal/frontier_coverage_20/group_std_mean": 0.07054910808801651, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000931426405441016, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000931426405441016, "signal/frontier_coverage_25/centered_abs_mean": 0.10885821878910065, "signal/frontier_coverage_25/group_std_mean": 0.13929919749498368, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001948562078177929, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001948562078177929, "signal/frontier_coverage_5/centered_abs_mean": 0.10867589861154556, "signal/frontier_coverage_5/group_std_mean": 0.15358475148677825, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019452984910458327, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019452984910458327, "signal/frontier_ece_reward/centered_abs_mean": 0.006094504240900278, "signal/frontier_ece_reward/group_std_mean": 0.00841012941673398, "signal/frontier_ece_reward/group_zero_std_frac": 0.008333333395421505, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007618130301125347, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007618130301125347, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.20361683297711194, "eval_calibration/batch_distribution_entropy": 0.7910456665612545, "eval_calibration/buffer_distribution_entropy": 0.8607564757055215, "eval_calibration/confidence_entropy": 0.40042139560153805, "eval_calibration/coverage@0%": 0.1213037634408602, "eval_calibration/coverage@1%": 0.1213037634408602, "eval_calibration/coverage@10%": 0.1743951612903226, "eval_calibration/coverage@15%": 0.46135752688172044, "eval_calibration/coverage@20%": 0.7064852150537634, "eval_calibration/coverage@25%": 0.8635752688172044, "eval_calibration/coverage@30%": 0.9474126344086021, "eval_calibration/coverage@5%": 0.1213037634408602, "eval_calibration/ece": 0.17280676072345136, "eval_calibration/mean_confidence": 0.7212002863951991, "eval_completions/clipped_ratio": 0.005208333333333352, "eval_completions/max_length": 2564.1666666666665, "eval_completions/max_terminated_length": 2564.1666666666665, "eval_completions/mean_length": 822.8699951171875, "eval_completions/mean_terminated_length": 827.223876953125, "eval_completions/min_length": 89.33333333333333, "eval_completions/min_terminated_length": 267.0, "eval_loss": 0.0, "eval_num_tokens": 444167190.0, "eval_reward": 1.059295157591502, "eval_reward_std": 0.2571010912458102, "eval_rewards/accuracy_reward": 0.6935763955116272, "eval_rewards/brier_reward": 0.8161595165729523, "eval_rewards/confidence_uniqueness_reward": 0.8582899471124014, "eval_rewards/format_reward": 0.9921875099341074, "eval_rewards/frontier_aurc_reward": -0.0019197222621490557, "eval_rewards/frontier_coverage_1": 0.0351586788892746, "eval_rewards/frontier_coverage_10": 0.035206587674717106, "eval_rewards/frontier_coverage_15": 0.03441356122493744, "eval_rewards/frontier_coverage_20": 0.052705912540356316, "eval_rewards/frontier_coverage_25": 0.17523299405972162, "eval_rewards/frontier_coverage_5": 0.0351586788892746, "eval_rewards/frontier_ece_reward": 0.004450828962338467, "eval_runtime": 198.5455, "eval_samples_per_second": 5.037, "eval_signal/accuracy_reward/centered_abs_mean": 0.41162109375, "eval_signal/accuracy_reward/group_std_mean": 0.4593026836713155, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.205810546875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.205810546875, "eval_signal/advantage_abs_mean": 0.22099632769823074, "eval_signal/advantage_pre_scale_abs_mean": 0.22099632769823074, "eval_signal/advantage_pre_scale_std": 0.2554586206873258, "eval_signal/advantage_std": 0.2554586206873258, "eval_signal/brier_reward/centered_abs_mean": 0.21053502460320792, "eval_signal/brier_reward/group_std_mean": 0.2717415342728297, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02631687807540099, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02631687807540099, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06546468411882718, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0933909999827544, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008183085514853397, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008183085514853397, "eval_signal/format_reward/centered_abs_mean": 0.015136718594779571, "eval_signal/format_reward/group_std_mean": 0.04419417337824901, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359297389786, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359297389786, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0032751309336163104, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007060678792186081, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.862484310152164e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.862484310152164e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1731132542093595, "eval_signal/frontier_coverage_1/group_std_mean": 0.2936793069044749, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030987270874902606, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030987270874902606, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.17259238163630167, "eval_signal/frontier_coverage_10/group_std_mean": 0.2929122944672902, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003089403461975356, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003089403461975356, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.11876154070099194, "eval_signal/frontier_coverage_15/group_std_mean": 0.212093619008859, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002125831456699719, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002125831456699719, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.07741224020719528, "eval_signal/frontier_coverage_20/group_std_mean": 0.10848981390396754, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001385679099864016, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001385679099864016, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.22898491968711218, "eval_signal/frontier_coverage_25/group_std_mean": 0.27611127495765686, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0040988298909117775, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0040988298909117775, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.1731132542093595, "eval_signal/frontier_coverage_5/group_std_mean": 0.2936793069044749, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030987270874902606, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030987270874902606, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.008298173546791077, "eval_signal/frontier_ece_reward/group_std_mean": 0.013287559927751621, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010372716933488846, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010372716933488846, "eval_steps_per_second": 0.03, "step": 200 }, { "calibration/aurc": 0.15245076106115, "calibration/batch_distribution_entropy": 0.7577490647981495, "calibration/buffer_distribution_entropy": 0.8615814010175041, "calibration/confidence_entropy": 0.370367604405363, "calibration/coverage@0%": 0.00835509138381201, "calibration/coverage@1%": 0.00835509138381201, "calibration/coverage@10%": 0.2857309478647174, "calibration/coverage@15%": 0.7050908432970091, "calibration/coverage@20%": 0.8035156789611826, "calibration/coverage@25%": 0.8801878712529355, "calibration/coverage@30%": 0.961588617212322, "calibration/coverage@5%": 0.09116869360084283, "calibration/ece": 0.12256849527723586, "calibration/mean_confidence": 0.7609970584329514, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00659722222222221, "completions/max_length": 3460.2, "completions/max_terminated_length": 3460.2, "completions/mean_length": 824.7370727539062, "completions/mean_terminated_length": 830.19814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 214.4, "epoch": 0.491993850076874, "grad_norm": 0.000395325681893155, "learning_rate": 9.036144578313253e-08, "loss": -0.0053, "num_tokens": 456734113.0, "reward": 1.1002012491226196, "reward_std": 0.11294516026973725, "rewards/accuracy_reward": 0.7552083373069763, "rewards/brier_reward": 0.8410086750984191, "rewards/confidence_uniqueness_reward": 0.9115934729576111, "rewards/format_reward": 0.9934027910232544, "rewards/frontier_aurc_reward": -0.0013322666753083467, "rewards/frontier_coverage_1": 0.01617803443223238, "rewards/frontier_coverage_10": 0.016358466073870658, "rewards/frontier_coverage_15": 0.022982559585943817, "rewards/frontier_coverage_20": 0.05930071547627449, "rewards/frontier_coverage_25": 0.22371198534965514, "rewards/frontier_coverage_5": 0.01617803443223238, "rewards/frontier_ece_reward": 0.003959867171943188, "signal/accuracy_reward/centered_abs_mean": 0.1392578125, "signal/accuracy_reward/group_std_mean": 0.1875597894191742, "signal/accuracy_reward/group_zero_std_frac": 0.44722222685813906, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06962890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06962890625, "signal/advantage_abs_mean": 0.07913011610507965, "signal/advantage_pre_scale_abs_mean": 0.07913011610507965, "signal/advantage_pre_scale_std": 0.15015988945960998, "signal/advantage_std": 0.15015988945960998, "signal/brier_reward/centered_abs_mean": 0.11419829726219177, "signal/brier_reward/group_std_mean": 0.1516292631626129, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014274787157773972, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014274787157773972, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03720296248793602, "signal/confidence_uniqueness_reward/group_std_mean": 0.058379728347063065, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004650370310992002, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004650370310992002, "signal/format_reward/centered_abs_mean": 0.012358940858393907, "signal/format_reward/group_std_mean": 0.028991687297821044, "signal/format_reward/group_zero_std_frac": 0.8611111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006179470429196953, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006179470429196953, "signal/frontier_aurc_reward/centered_abs_mean": 0.001682829950004816, "signal/frontier_aurc_reward/group_std_mean": 0.0030279669910669325, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.012265406141523e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.012265406141523e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11965394765138626, "signal/frontier_coverage_1/group_std_mean": 0.16320410072803498, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021418056450784205, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021418056450784205, "signal/frontier_coverage_10/centered_abs_mean": 0.11909585893154144, "signal/frontier_coverage_10/group_std_mean": 0.16247594058513642, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002131815906614065, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002131815906614065, "signal/frontier_coverage_15/centered_abs_mean": 0.08062577843666077, "signal/frontier_coverage_15/group_std_mean": 0.11165858805179596, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014432014198973776, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014432014198973776, "signal/frontier_coverage_20/centered_abs_mean": 0.05757641866803169, "signal/frontier_coverage_20/group_std_mean": 0.07493609786033631, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010306178824976086, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010306178824976086, "signal/frontier_coverage_25/centered_abs_mean": 0.1128468781709671, "signal/frontier_coverage_25/group_std_mean": 0.1470080941915512, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020199591061100365, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020199591061100365, "signal/frontier_coverage_5/centered_abs_mean": 0.11965394765138626, "signal/frontier_coverage_5/group_std_mean": 0.16320410072803498, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021418056450784205, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021418056450784205, "signal/frontier_ece_reward/centered_abs_mean": 0.006321498658508062, "signal/frontier_ece_reward/group_std_mean": 0.008466892875730991, "signal/frontier_ece_reward/group_zero_std_frac": 0.008333333395421505, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007901873323135078, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007901873323135078, "step": 205 }, { "calibration/aurc": 0.09685634602876492, "calibration/batch_distribution_entropy": 0.7791143614888557, "calibration/buffer_distribution_entropy": 0.8611139279672693, "calibration/confidence_entropy": 0.38429371665391043, "calibration/coverage@0%": 0.022687609075043632, "calibration/coverage@1%": 0.022687609075043632, "calibration/coverage@10%": 0.618178636717365, "calibration/coverage@15%": 0.781235178457132, "calibration/coverage@20%": 0.8722135499408289, "calibration/coverage@25%": 0.9379210469362288, "calibration/coverage@30%": 0.981675392670157, "calibration/coverage@5%": 0.44410983252519953, "calibration/ece": 0.0883028476897068, "calibration/mean_confidence": 0.7646092393666405, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005353009259259263, "completions/max_length": 3607.0, "completions/max_terminated_length": 3607.0, "completions/mean_length": 825.1263020833334, "completions/mean_terminated_length": 829.6256917317709, "completions/min_length": 0.0, "completions/min_terminated_length": 193.33333333333334, "epoch": 0.49919376007799904, "num_tokens": 464303434.0, "reward": 1.0698885917663574, "reward_std": 0.1222114438811938, "rewards/accuracy_reward": 0.6950231591860453, "rewards/brier_reward": 0.8242372075716654, "rewards/confidence_uniqueness_reward": 0.9139418800671896, "rewards/format_reward": 0.9945023059844971, "rewards/frontier_aurc_reward": -0.001452996317918102, "rewards/frontier_coverage_1": 0.03662515555818876, "rewards/frontier_coverage_10": 0.03657694533467293, "rewards/frontier_coverage_15": 0.03316311786572138, "rewards/frontier_coverage_20": 0.06067184483011564, "rewards/frontier_coverage_25": 0.20710508028666177, "rewards/frontier_coverage_5": 0.03662515555818876, "rewards/frontier_ece_reward": 0.00421436270698905, "signal/accuracy_reward/centered_abs_mean": 0.15033637235562006, "signal/accuracy_reward/group_std_mean": 0.2045428305864334, "signal/accuracy_reward/group_zero_std_frac": 0.40740742286046344, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07516818617781003, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07516818617781003, "signal/advantage_abs_mean": 0.08676933993895848, "signal/advantage_pre_scale_abs_mean": 0.08676933993895848, "signal/advantage_pre_scale_std": 0.15460805098215738, "signal/advantage_std": 0.15460805098215738, "signal/brier_reward/centered_abs_mean": 0.125543013215065, "signal/brier_reward/group_std_mean": 0.16625908513863882, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015692876651883125, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015692876651883125, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03518642236789068, "signal/confidence_uniqueness_reward/group_std_mean": 0.05552714318037033, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004398302795986335, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004398302795986335, "signal/format_reward/centered_abs_mean": 0.0103804978231589, "signal/format_reward/group_std_mean": 0.026168825725714367, "signal/format_reward/group_zero_std_frac": 0.8657407363255819, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00519024891157945, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00519024891157945, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019372629079346855, "signal/frontier_aurc_reward/group_std_mean": 0.0035577377614875636, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.46770048054168e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.46770048054168e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12492906053860982, "signal/frontier_coverage_1/group_std_mean": 0.17234125236670175, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022362301436563334, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022362301436563334, "signal/frontier_coverage_10/centered_abs_mean": 0.12409953524669011, "signal/frontier_coverage_10/group_std_mean": 0.17126783728599548, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002221381369357308, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002221381369357308, "signal/frontier_coverage_15/centered_abs_mean": 0.07909848541021347, "signal/frontier_coverage_15/group_std_mean": 0.11092762400706609, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00141586281824857, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00141586281824857, "signal/frontier_coverage_20/centered_abs_mean": 0.05819156641761462, "signal/frontier_coverage_20/group_std_mean": 0.0763649841149648, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010416289248193304, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010416289248193304, "signal/frontier_coverage_25/centered_abs_mean": 0.12411411106586456, "signal/frontier_coverage_25/group_std_mean": 0.16261087854703268, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002221642527729273, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002221642527729273, "signal/frontier_coverage_5/centered_abs_mean": 0.12492906053860982, "signal/frontier_coverage_5/group_std_mean": 0.17234125236670175, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022362301436563334, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022362301436563334, "signal/frontier_ece_reward/centered_abs_mean": 0.006716146133840084, "signal/frontier_ece_reward/group_std_mean": 0.009008504450321198, "signal/frontier_ece_reward/group_zero_std_frac": 0.004629629664123058, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008395182667300105, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008395182667300105, "step": 208, "total_flos": 0.0, "train_loss": -0.01026152794320996, "train_runtime": 40759.4999, "train_samples_per_second": 0.368, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 464303434, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }